------------------------------------------------- -- | -- Module : PGF -- Maintainer : Aarne Ranta -- Stability : stable -- Portability : portable -- -- This module is an Application Programming Interface to -- load and interpret grammars compiled in Portable Grammar Format (PGF). -- The PGF format is produced as a final output from the GF compiler. -- The API is meant to be used for embedding GF grammars in Haskell -- programs. ------------------------------------------------- module PGF( -- * PGF PGF, readPGF, -- * Identifiers -- ** CId CId, mkCId, prCId, -- ** Language Language, languages, abstractName, -- ** Category Category, categories, startCat, -- * Expressions -- ** Tree Tree(..), Literal(..), showTree, readTree, -- ** Expr Expr(..), Equation(..), showExpr, readExpr, -- * Operations -- ** Linearization linearize, linearizeAllLang, linearizeAll, -- ** Parsing parse, parseAllLang, parseAll, -- ** Evaluation tree2expr, expr2tree, -- ** Word Completion (Incremental Parsing) Incremental.ParseState, initState, Incremental.nextState, Incremental.getCompletions, extractExps, -- ** Generation generateRandom, generateAll, generateAllDepth ) where import PGF.CId import PGF.Linearize import PGF.Generate import PGF.Macros import PGF.Data import PGF.Expr import PGF.Raw.Convert import PGF.Raw.Parse import PGF.Raw.Print (printTree) import PGF.Parsing.FCFG import qualified PGF.Parsing.FCFG.Incremental as Incremental import GF.Text.UTF8 import GF.Data.ErrM import qualified Data.Map as Map import System.Random (newStdGen) --------------------------------------------------- -- Interface --------------------------------------------------- -- | This is just a string with the language name. -- A language name is the identifier that you write in the -- top concrete or abstract module in GF after the -- concrete/abstract keyword. Example: -- -- > abstract Lang = ... -- > concrete LangEng of Lang = ... type Language = String -- | This is just a string with the category name. -- The categories are defined in the abstract syntax -- with the \'cat\' keyword. type Category = String -- | Reads file in Portable Grammar Format and produces -- 'PGF' structure. The file is usually produced with: -- -- > $ gfc --make readPGF :: FilePath -> IO PGF -- | Linearizes given expression as string in the language linearize :: PGF -> Language -> Tree -> String -- | Tries to parse the given string in the specified language -- and to produce abstract syntax expression. An empty -- list is returned if the parsing is not successful. The list may also -- contain more than one element if the grammar is ambiguous. parse :: PGF -> Language -> Category -> String -> [Tree] -- | The same as 'linearizeAllLang' but does not return -- the language. linearizeAll :: PGF -> Tree -> [String] -- | Linearizes given expression as string in all languages -- available in the grammar. linearizeAllLang :: PGF -> Tree -> [(Language,String)] -- | The same as 'parseAllLang' but does not return -- the language. parseAll :: PGF -> Category -> String -> [[Tree]] -- | Tries to parse the given string with every language -- available in the grammar and to produce abstract syntax -- expression. The returned list contains pairs of language -- and list of possible expressions. Only those languages -- for which at least one parsing is possible are listed. -- More than one abstract syntax expressions are possible -- if the grammar is ambiguous. parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])] -- | Creates an initial parsing state for a given language and -- startup category. initState :: PGF -> Language -> Category -> Incremental.ParseState -- | This function extracts the list of all completed parse trees -- that spans the whole input consumed so far. The trees are also -- limited by the category specified, which is usually -- the same as the startup category. extractExps :: Incremental.ParseState -> Category -> [Tree] -- | The same as 'generateAllDepth' but does not limit -- the depth in the generation. generateAll :: PGF -> Category -> [Tree] -- | Generates an infinite list of random abstract syntax expressions. -- This is usefull for tree bank generation which after that can be used -- for grammar testing. generateRandom :: PGF -> Category -> IO [Tree] -- | Generates an exhaustive possibly infinite list of -- abstract syntax expressions. A depth can be specified -- to limit the search space. generateAllDepth :: PGF -> Category -> Maybe Int -> [Tree] -- | List of all languages available in the given grammar. languages :: PGF -> [Language] -- | The abstract language name is the name of the top-level -- abstract module abstractName :: PGF -> Language -- | List of all categories defined in the given grammar. categories :: PGF -> [Category] -- | The start category is defined in the grammar with -- the \'startcat\' flag. This is usually the sentence category -- but it is not necessary. Despite that there is a start category -- defined you can parse with any category. The start category -- definition is just for convenience. startCat :: PGF -> Category --------------------------------------------------- -- Implementation --------------------------------------------------- readPGF f = do s <- readFile f g <- parseGrammar s return $! toPGF g linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf (mkCId lang) parse pgf lang cat s = case Map.lookup (mkCId lang) (concretes pgf) of Just cnc -> case parser cnc of Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" then Incremental.parse pinfo (mkCId cat) (words s) else case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of Ok x -> x Bad s -> error s Nothing -> error ("No parser built fo language: " ++ lang) Nothing -> error ("Unknown language: " ++ lang) linearizeAll mgr = map snd . linearizeAllLang mgr linearizeAllLang mgr t = [(lang,PGF.linearize mgr lang t) | lang <- languages mgr] parseAll mgr cat = map snd . parseAllLang mgr cat parseAllLang mgr cat s = [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)] initState pgf lang cat = case lookParser pgf langCId of Just pinfo -> Incremental.initState pinfo catCId _ -> error ("Unknown language: " ++ lang) where langCId = mkCId lang catCId = mkCId cat extractExps state cat = Incremental.extractExps state (mkCId cat) generateRandom pgf cat = do gen <- newStdGen return $ genRandom gen pgf (mkCId cat) generateAll pgf cat = generate pgf (mkCId cat) Nothing generateAllDepth pgf cat = generate pgf (mkCId cat) abstractName pgf = prCId (absname pgf) languages pgf = [prCId l | l <- cncnames pgf] categories pgf = [prCId c | c <- Map.keys (cats (abstract pgf))] startCat pgf = lookStartCat pgf