From 3f3188a430e0b25d550600aca076b6703c6688fb Mon Sep 17 00:00:00 2001 From: krasimir Date: Fri, 30 May 2008 13:07:11 +0000 Subject: [PATCH] cleaned up and documented PGF API --- src-3.0/GF/Command/Commands.hs | 10 +- src-3.0/GF/Command/Importing.hs | 2 +- src-3.0/GF/Command/Interpreter.hs | 6 +- src-3.0/GFI.hs | 6 +- src-3.0/PGF.hs | 170 +++++++++++++++++++++--------- src-3.0/PGF/CId.hs | 4 + src-3.0/PGF/Data.hs | 2 + src-3.0/PGF/Parsing/FCFG.hs | 23 ++-- src-3.0/PGF/Raw/Convert.hs | 1 - 9 files changed, 153 insertions(+), 71 deletions(-) diff --git a/src-3.0/GF/Command/Commands.hs b/src-3.0/GF/Command/Commands.hs index f3789d669..8761234cd 100644 --- a/src-3.0/GF/Command/Commands.hs +++ b/src-3.0/GF/Command/Commands.hs @@ -9,7 +9,7 @@ module GF.Command.Commands ( CommandOutput ) where -import GF.Command.AbsGFShell hiding (Tree) +import GF.Command.AbsGFShell import GF.Command.PPrTree import GF.Command.ParGFShell import PGF @@ -23,10 +23,10 @@ import GF.Data.ErrM ---- import qualified Data.Map as Map -type CommandOutput = ([Tree],String) ---- errors, etc +type CommandOutput = ([Exp],String) ---- errors, etc data CommandInfo = CommandInfo { - exec :: [Option] -> [Tree] -> IO CommandOutput, + exec :: [Option] -> [Exp] -> IO CommandOutput, synopsis :: String, explanation :: String, longname :: String, @@ -106,7 +106,7 @@ allCommands pgf = Map.fromAscList [ synopsis = "get description of a command, or a the full list of commands", options = ["full"], exec = \opts ts -> return ([], case ts of - [t] -> let co = (showTree t) in + [t] -> let co = showExp t in case lookCommand co (allCommands pgf) of ---- new map ??!! Just info -> commandHelp True (co,info) _ -> "command not found" @@ -146,7 +146,7 @@ allCommands pgf = Map.fromAscList [ optNum opts = valIntOpts "number" 1 opts optNumInf opts = valIntOpts "number" 1000000000 opts ---- 10^9 - fromTrees ts = (ts,unlines (map showTree ts)) + fromTrees ts = (ts,unlines (map showExp ts)) fromStrings ss = (map EStr ss, unlines ss) fromString s = ([EStr s], s) toStrings ts = [s | EStr s <- ts] diff --git a/src-3.0/GF/Command/Importing.hs b/src-3.0/GF/Command/Importing.hs index d4eeb18ce..48f07969d 100644 --- a/src-3.0/GF/Command/Importing.hs +++ b/src-3.0/GF/Command/Importing.hs @@ -23,7 +23,7 @@ importGrammar pgf0 opts files = Bad msg -> do putStrLn msg return pgf0 ".pgf" -> do - pgf2 <- mapM file2pgf files >>= return . foldl1 unionPGF + pgf2 <- mapM readPGF files >>= return . foldl1 unionPGF return $ unionPGF pgf0 pgf2 importSource :: SourceGrammar -> Options -> [FilePath] -> IO SourceGrammar diff --git a/src-3.0/GF/Command/Interpreter.hs b/src-3.0/GF/Command/Interpreter.hs index c33d6453a..a5da51f7e 100644 --- a/src-3.0/GF/Command/Interpreter.hs +++ b/src-3.0/GF/Command/Interpreter.hs @@ -4,7 +4,7 @@ module GF.Command.Interpreter ( ) where import GF.Command.Commands -import GF.Command.AbsGFShell hiding (Tree) +import GF.Command.AbsGFShell import GF.Command.PPrTree import GF.Command.ParGFShell import PGF @@ -40,7 +40,7 @@ interpretCommandLine env line = case (pCommandLine (myLexer line)) of interc = interpret env -- return the trees to be sent in pipe, and the output possibly printed -interpret :: CommandEnv -> [Tree] -> Command -> IO CommandOutput +interpret :: CommandEnv -> [Exp] -> Command -> IO CommandOutput interpret env trees0 comm = case lookCommand co comms of Just info -> do checkOpts info @@ -64,7 +64,7 @@ interpret env trees0 comm = case lookCommand co comms of os -> putStrLn $ "options not interpreted: " ++ unwords os -- analyse command parse tree to a uniform datastructure, normalizing comm name -getCommand :: Command -> [Tree] -> (String,[Option],[Tree]) +getCommand :: Command -> [Exp] -> (String,[Option],[Exp]) getCommand co ts = case co of Comm (Ident c) opts (ATree t) -> (getOp c,opts,[tree2exp t]) -- ignore piped CNoarg (Ident c) opts -> (getOp c,opts,ts) -- use piped diff --git a/src-3.0/GFI.hs b/src-3.0/GFI.hs index 24de6c70c..9c38c69b0 100644 --- a/src-3.0/GFI.hs +++ b/src-3.0/GFI.hs @@ -9,6 +9,7 @@ import GF.Infra.UseIO import GF.Infra.Option import GF.System.Readline (fetchCommand) import PGF +import PGF.Data import System.CPUTime @@ -34,8 +35,9 @@ loop gfenv0 = do "cc":ws -> do -- FIXME: add options parsing for cc arguments let (opts,term) = (TermPrintDefault, ws) - let t = pTerm (unwords term) >>= checkTerm sgr >>= computeTerm sgr - err putStrLn (putStrLn . showTerm opts) t ---- make pipable + case pTerm (unwords term) >>= checkTerm sgr >>= computeTerm sgr of ---- make pipable + Ok x -> putStrLn (showTerm opts x) + Bad s -> putStrLn s loopNewCPU gfenv "i":args -> do case parseOptions args of diff --git a/src-3.0/PGF.hs b/src-3.0/PGF.hs index 9e0a6007e..fafbfafcf 100644 --- a/src-3.0/PGF.hs +++ b/src-3.0/PGF.hs @@ -1,28 +1,54 @@ ----------------------------------------------------------------------- +------------------------------------------------- -- | --- Module : GFCCAPI +-- Module : PGF -- Maintainer : Aarne Ranta --- Stability : (stable) --- Portability : (portable) +-- Stability : stable +-- Portability : portable -- --- > CVS $Date: --- > CVS $Author: --- > CVS $Revision: --- --- Reduced Application Programmer's Interface to GF, meant for --- embedded GF systems. AR 19/9/2007 ------------------------------------------------------------------------------ +-- Application Programming Interface to PGF. +------------------------------------------------- -module PGF(module PGF, PGF, emptyPGF) where +module PGF( + -- * PGF + PGF, + readPGF, + + -- * Identifiers + -- ** CId + CId, mkCId, prCId, + + -- ** Language + Language, languages, abstractName, + + -- ** Category + Category, categories, startCat, + + -- * Expressions + Exp(..), + showExp, readExp, + + -- * Operations + -- ** Linearization + linearize, linearizeAllLang, linearizeAll, + + -- ** Parsing + parse, parseAllLang, parseAll, + + -- ** Generation + generateRandom, generateAll, generateAllDepth + ) where import PGF.CId -import PGF.Linearize +import PGF.Linearize hiding (linearize) +import qualified PGF.Linearize (linearize) import PGF.Generate import PGF.Macros import PGF.Data import PGF.Raw.Convert import PGF.Raw.Parse +import PGF.Raw.Print (printTree) import PGF.Parsing.FCFG +import GF.Text.UTF8 import GF.Data.ErrM @@ -37,45 +63,105 @@ import qualified Text.ParserCombinators.ReadP as RP -- This API is meant to be used when embedding GF grammars in Haskell -- programs. The embedded system is supposed to use the --- .gfcc grammar format, which is first produced by the gf program. +-- .pgf grammar format, which is first produced by the gf program. --------------------------------------------------- -- Interface --------------------------------------------------- +-- | This is just a string with the language name. +-- A language name is the identifier that you write in the +-- top concrete or abstract module in GF after the +-- concrete/abstract keyword. Example: +-- +-- > abstract Lang = ... +-- > concrete LangEng of Lang = ... type Language = String + +-- | This is just a string with the category name. +-- The categories are defined in the abstract syntax +-- with the \'cat\' keyword. type Category = String -type Tree = Exp -file2pgf :: FilePath -> IO PGF +-- | Reads file in Portable Grammar Format and produces +-- 'PGF' structure. The file is usually produced with: +-- +-- > $ gfc --make +readPGF :: FilePath -> IO PGF -linearize :: PGF -> Language -> Tree -> String -parse :: PGF -> Language -> Category -> String -> [Tree] +-- | Linearizes given expression as string in the language +linearize :: PGF -> Language -> Exp -> String -linearizeAll :: PGF -> Tree -> [String] -linearizeAllLang :: PGF -> Tree -> [(Language,String)] +-- | Tries to parse the given string in the specified language +-- and to produce abstract syntax expression. An empty +-- list is returned if the parsing is not successful. The list may also +-- contain more than one element if the grammar is ambiguous. +parse :: PGF -> Language -> Category -> String -> [Exp] -parseAll :: PGF -> Category -> String -> [[Tree]] -parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])] +-- | The same as 'linearizeAllLang' but does not return +-- the language. +linearizeAll :: PGF -> Exp -> [String] -generateAll :: PGF -> Category -> [Tree] -generateRandom :: PGF -> Category -> IO [Tree] -generateAllDepth :: PGF -> Category -> Maybe Int -> [Tree] +-- | Linearizes given expression as string in all languages +-- available in the grammar. +linearizeAllLang :: PGF -> Exp -> [(Language,String)] -readTree :: String -> Tree -showTree :: Tree -> String +-- | The same as 'parseAllLang' but does not return +-- the language. +parseAll :: PGF -> Category -> String -> [[Exp]] -languages :: PGF -> [Language] +-- | Tries to parse the given string with every language +-- available in the grammar and to produce abstract syntax +-- expression. The returned list contains pairs of language +-- and list of possible expressions. Only those languages +-- for which at least one parsing is possible are listed. +-- More than one abstract syntax expressions are possible +-- if the grammar is ambiguous. +parseAllLang :: PGF -> Category -> String -> [(Language,[Exp])] + +-- | The same as 'generateAllDepth' but does not limit +-- the depth in the generation. +generateAll :: PGF -> Category -> [Exp] + +-- | Generates an infinite list of random abstract syntax expressions. +-- This is usefull for tree bank generation which after that can be used +-- for grammar testing. +generateRandom :: PGF -> Category -> IO [Exp] + +-- | Generates an exhaustive possibly infinite list of +-- abstract syntax expressions. A depth can be specified +-- to limit the search space. +generateAllDepth :: PGF -> Category -> Maybe Int -> [Exp] + +-- | parses 'String' as an expression +readExp :: String -> Maybe Exp + +-- | renders expression as 'String' +showExp :: Exp -> String + +-- | List of all languages available in the given grammar. +languages :: PGF -> [Language] + +-- | The abstract language name is the name of the top-level +-- abstract module +abstractName :: PGF -> Language + +-- | List of all categories defined in the given grammar. categories :: PGF -> [Category] +-- | The start category is defined in the grammar with +-- the \'startcat\' flag. This is usually the sentence category +-- but it is not necessary. Despite that there is a start category +-- defined you can parse with any category. The start category +-- definition is just for convenience. startCat :: PGF -> Category --------------------------------------------------- -- Implementation --------------------------------------------------- -file2pgf f = do - s <- readFileIf f +readPGF f = do + s <- readFile f g <- parseGrammar s return $! toPGF g @@ -83,9 +169,9 @@ linearize pgf lang = PGF.Linearize.linearize pgf (mkCId lang) parse pgf lang cat s = case lookParser pgf (mkCId lang) of - Nothing -> error "no parser" + Nothing -> error ("Unknown language: " ++ lang) Just pinfo -> case parseFCF "bottomup" pinfo (mkCId cat) (words s) of - Ok x -> x + Ok x -> x Bad s -> error s linearizeAll mgr = map snd . linearizeAllLang mgr @@ -104,9 +190,9 @@ generateRandom pgf cat = do generateAll pgf cat = generate pgf (mkCId cat) Nothing generateAllDepth pgf cat = generate pgf (mkCId cat) -readTree s = case RP.readP_to_S (pExp False) s of - [(x,"")] -> x - _ -> error "no parse" +readExp s = case RP.readP_to_S (pExp False) s of + [(x,"")] -> Just x + _ -> Nothing pExps :: RP.ReadP [Exp] pExps = liftM2 (:) (pExp True) pExps RP.<++ (RP.skipSpaces >> return []) @@ -136,7 +222,7 @@ pExp isNested = RP.skipSpaces >> (pParen RP.<++ pAbs RP.<++ pApp RP.<++ pNum RP. isIdentRest c = c == '_' || c == '\'' || isAlphaNum c -showTree = PP.render . ppExp False +showExp = PP.render . ppExp False ppExp isNested (EAbs xs t) = ppParens isNested (PP.char '\\' PP.<> PP.hsep (PP.punctuate PP.comma (map (PP.text . prCId) xs)) PP.<+> @@ -160,15 +246,3 @@ languages pgf = [prCId l | l <- cncnames pgf] categories pgf = [prCId c | c <- Map.keys (cats (abstract pgf))] startCat pgf = lookStartCat pgf - - ------------- for internal use only - -err f g ex = case ex of - Ok x -> g x - Bad s -> f s - -readFileIf f = do - b <- doesFileExist f - if b then readFile f - else putStrLn ("file " ++ f ++ " not found") >> return "" diff --git a/src-3.0/PGF/CId.hs b/src-3.0/PGF/CId.hs index 8853d3d5b..161529308 100644 --- a/src-3.0/PGF/CId.hs +++ b/src-3.0/PGF/CId.hs @@ -2,13 +2,17 @@ module PGF.CId (CId(..), wildCId, mkCId, prCId) where import Data.ByteString.Char8 as BS +-- | An abstract data type that represents +-- function identifier in PGF. newtype CId = CId BS.ByteString deriving (Eq,Ord,Show) wildCId :: CId wildCId = CId (BS.singleton '_') +-- | Creates a new identifier from 'String' mkCId :: String -> CId mkCId s = CId (BS.pack s) +-- | Renders the identifier as 'String' prCId :: CId -> String prCId (CId x) = BS.unpack x diff --git a/src-3.0/PGF/Data.hs b/src-3.0/PGF/Data.hs index 8c836c893..95ea2e1a7 100644 --- a/src-3.0/PGF/Data.hs +++ b/src-3.0/PGF/Data.hs @@ -10,6 +10,8 @@ import Data.Array -- internal datatypes for PGF +-- | An abstract data type representing multilingual grammar +-- in Portable Grammar Format. data PGF = PGF { absname :: CId , cncnames :: [CId] , diff --git a/src-3.0/PGF/Parsing/FCFG.hs b/src-3.0/PGF/Parsing/FCFG.hs index 81fc6a3e4..a7453fff8 100644 --- a/src-3.0/PGF/Parsing/FCFG.hs +++ b/src-3.0/PGF/Parsing/FCFG.hs @@ -28,16 +28,17 @@ import qualified Data.Map as Map -- main parsing function -parseFCF :: - String -> -- ^ parsing strategy - ParserInfo -> -- ^ compiled grammar (fcfg) - CId -> -- ^ starting category - [String] -> -- ^ input tokens - Err [Exp] -- ^ resulting GF terms +parseFCF :: String -- ^ parsing strategy + -> ParserInfo -- ^ compiled grammar (fcfg) + -> CId -- ^ starting category + -> [String] -- ^ input tokens + -> Err [Exp] -- ^ resulting GF terms parseFCF strategy pinfo startCat inString = do let inTokens = input inString - startCats <- Map.lookup startCat (startupCats pinfo) - fcfParser <- {- trace lctree $ -} parseFCF strategy + startCats <- case Map.lookup startCat (startupCats pinfo) of + Just cats -> return cats + Nothing -> fail $ "Unknown startup category: " ++ prCId startCat + fcfParser <- parseFCF strategy let chart = fcfParser pinfo startCats inTokens (i,j) = inputBounds inTokens finalEdges = [makeFinalEdge cat i j | cat <- startCats] @@ -46,6 +47,6 @@ parseFCF strategy pinfo startCat inString = return $ nubsort $ filteredForests >>= forest2exps where parseFCF :: String -> Err (FCFParser) - parseFCF "bottomup" = Ok $ parse "b" - parseFCF "topdown" = Ok $ parse "t" - parseFCF strat = Bad $ "FCFG parsing strategy not defined: " ++ strat + parseFCF "bottomup" = return $ parse "b" + parseFCF "topdown" = return $ parse "t" + parseFCF strat = fail $ "FCFG parsing strategy not defined: " ++ strat diff --git a/src-3.0/PGF/Raw/Convert.hs b/src-3.0/PGF/Raw/Convert.hs index 3caa07aec..a8398093b 100644 --- a/src-3.0/PGF/Raw/Convert.hs +++ b/src-3.0/PGF/Raw/Convert.hs @@ -181,7 +181,6 @@ fromExp e = case e of EMeta _ -> AMet ---- EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs] - _ -> error $ "exp " ++ show e fromTerm :: Term -> RExp fromTerm e = case e of