module PGF.Data where import PGF.CId import GF.Text.UTF8 import GF.Data.Assoc import qualified Data.Map as Map import Data.List import Data.Array -- internal datatypes for PGF -- | An abstract data type representing multilingual grammar -- in Portable Grammar Format. data PGF = PGF { absname :: CId , cncnames :: [CId] , gflags :: Map.Map CId String, -- value of a global flag abstract :: Abstr , concretes :: Map.Map CId Concr } data Abstr = Abstr { aflags :: Map.Map CId String, -- value of a flag funs :: Map.Map CId (Type,Expr), -- type and def of a fun cats :: Map.Map CId [Hypo], -- context of a cat catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) } data Concr = Concr { cflags :: Map.Map CId String, -- value of a flag lins :: Map.Map CId Term, -- lin of a fun opers :: Map.Map CId Term, -- oper generated by subex elim lincats :: Map.Map CId Term, -- lin type of a cat lindefs :: Map.Map CId Term, -- lin default of a cat printnames :: Map.Map CId Term, -- printname of a cat or a fun paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names parser :: Maybe ParserInfo -- parser } data Type = DTyp [Hypo] CId [Expr] deriving (Eq,Ord,Show) data Literal = LStr String -- ^ string constant | LInt Integer -- ^ integer constant | LFlt Double -- ^ floating point constant deriving (Eq,Ord,Show) -- | The tree is an evaluated expression in the abstract syntax -- of the grammar. The type is especially restricted to not -- allow unapplied lambda abstractions. The tree is used directly -- from the linearizer and is produced directly from the parser. data Tree = Abs [CId] Tree -- ^ lambda abstraction. The list of variables is non-empty | Var CId -- ^ variable | Fun CId [Tree] -- ^ function application | Lit Literal -- ^ literal | Meta Int -- ^ meta variable deriving (Show, Eq, Ord) -- | An expression represents a potentially unevaluated expression -- in the abstract syntax of the grammar. It can be evaluated with -- the 'expr2tree' function and then linearized or it can be used -- directly in the dependent types. data Expr = EAbs CId Expr -- ^ lambda abstraction | EApp Expr Expr -- ^ application | ELit Literal -- ^ literal | EMeta Int -- ^ meta variable | EVar CId -- ^ variable or function reference | EEq [Equation] -- ^ lambda function defined as a set of equations with pattern matching deriving (Eq,Ord,Show) data Term = R [Term] | P Term Term | S [Term] | K Tokn | V Int | C Int | F CId | FV [Term] | W String Term | TM String deriving (Eq,Ord,Show) data Tokn = KS String | KP [String] [Alternative] deriving (Eq,Ord,Show) data Alternative = Alt [String] [String] deriving (Eq,Ord,Show) data Hypo = Hyp CId Type deriving (Eq,Ord,Show) -- | The equation is used to define lambda function as a sequence -- of equations with pattern matching. The list of 'Expr' represents -- the patterns and the second 'Expr' is the function body for this -- equation. data Equation = Equ [Expr] Expr deriving (Eq,Ord,Show) type FToken = String type FCat = Int type FIndex = Int data FSymbol = FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int | FSymTok FToken type Profile = [Int] type FPointPos = Int type FGrammar = ([FRule], Map.Map CId [FCat]) data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) type RuleId = Int data ParserInfo = ParserInfo { allRules :: Array RuleId FRule , topdownRules :: Assoc FCat [RuleId] -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): -- , emptyRules :: [RuleId] , epsilonRules :: [RuleId] -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): , leftcornerCats :: Assoc FCat [RuleId] , leftcornerTokens :: Assoc FToken [RuleId] -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): , grammarCats :: [FCat] , grammarToks :: [FToken] , startupCats :: Map.Map CId [FCat] } fcatString, fcatInt, fcatFloat, fcatVar :: Int fcatString = (-1) fcatInt = (-2) fcatFloat = (-3) fcatVar = (-4) -- print statistics statGFCC :: PGF -> String statGFCC pgf = unlines [ "Abstract\t" ++ prCId (absname pgf), "Concretes\t" ++ unwords (map prCId (cncnames pgf)), "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf)))) ] -- merge two GFCCs; fails is differens absnames; priority to second arg unionPGF :: PGF -> PGF -> PGF unionPGF one two = case absname one of n | n == wildCId -> two -- extending empty grammar | n == absname two -> one { -- extending grammar with same abstract concretes = Map.union (concretes two) (concretes one), cncnames = union (cncnames two) (cncnames one) } _ -> one -- abstracts don't match ---- print error msg emptyPGF :: PGF emptyPGF = PGF { absname = wildCId, cncnames = [] , gflags = Map.empty, abstract = error "empty grammar, no abstract", concretes = Map.empty } -- encode idenfifiers and strings in UTF8 utf8GFCC :: PGF -> PGF utf8GFCC pgf = pgf { concretes = Map.map u8concr (concretes pgf) } where u8concr cnc = cnc { lins = Map.map u8term (lins cnc), opers = Map.map u8term (opers cnc) } u8term = convertStringsInTerm encodeUTF8 ---- TODO: convert identifiers and flags convertStringsInTerm conv t = case t of K (KS s) -> K (KS (conv s)) W s r -> W (conv s) (convs r) R ts -> R $ map convs ts S ts -> S $ map convs ts FV ts -> FV $ map convs ts P u v -> P (convs u) (convs v) _ -> t where convs = convertStringsInTerm conv