module PGF.Data where import PGF.CId import GF.Text.UTF8 import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.IntMap as IntMap import Data.List import Data.Array import Data.Array.Unboxed -- internal datatypes for PGF -- | An abstract data type representing multilingual grammar -- in Portable Grammar Format. data PGF = PGF { absname :: CId , cncnames :: [CId] , gflags :: Map.Map CId String, -- value of a global flag abstract :: Abstr , concretes :: Map.Map CId Concr } data Abstr = Abstr { aflags :: Map.Map CId String, -- value of a flag funs :: Map.Map CId (Type,Expr), -- type and def of a fun cats :: Map.Map CId [Hypo], -- context of a cat catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) } data Concr = Concr { cflags :: Map.Map CId String, -- value of a flag lins :: Map.Map CId Term, -- lin of a fun opers :: Map.Map CId Term, -- oper generated by subex elim lincats :: Map.Map CId Term, -- lin type of a cat lindefs :: Map.Map CId Term, -- lin default of a cat printnames :: Map.Map CId Term, -- printname of a cat or a fun paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names parser :: Maybe ParserInfo -- parser } data Type = DTyp [Hypo] CId [Expr] deriving (Eq,Ord,Show) data Literal = LStr String -- ^ string constant | LInt Integer -- ^ integer constant | LFlt Double -- ^ floating point constant deriving (Eq,Ord,Show) -- | The tree is an evaluated expression in the abstract syntax -- of the grammar. The type is especially restricted to not -- allow unapplied lambda abstractions. The tree is used directly -- from the linearizer and is produced directly from the parser. data Tree = Abs [CId] Tree -- ^ lambda abstraction. The list of variables is non-empty | Var CId -- ^ variable | Fun CId [Tree] -- ^ function application | Lit Literal -- ^ literal | Meta Int -- ^ meta variable deriving (Show, Eq, Ord) -- | An expression represents a potentially unevaluated expression -- in the abstract syntax of the grammar. It can be evaluated with -- the 'expr2tree' function and then linearized or it can be used -- directly in the dependent types. data Expr = EAbs CId Expr -- ^ lambda abstraction | EApp Expr Expr -- ^ application | ELit Literal -- ^ literal | EMeta Int -- ^ meta variable | EVar CId -- ^ variable or function reference | EEq [Equation] -- ^ lambda function defined as a set of equations with pattern matching | EPi CId Expr Expr -- ^ dependent function type deriving (Eq,Ord,Show) data Term = R [Term] | P Term Term | S [Term] | K Tokn | V Int | C Int | F CId | FV [Term] | W String Term | TM String deriving (Eq,Ord,Show) data Tokn = KS String | KP [String] [Alternative] deriving (Eq,Ord,Show) data Alternative = Alt [String] [String] deriving (Eq,Ord,Show) data Hypo = Hyp CId Type deriving (Eq,Ord,Show) -- | The equation is used to define lambda function as a sequence -- of equations with pattern matching. The list of 'Expr' represents -- the patterns and the second 'Expr' is the function body for this -- equation. data Equation = Equ [Expr] Expr deriving (Eq,Ord,Show) type FCat = Int type FIndex = Int type FPointPos = Int data FSymbol = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex | FSymTok Tokn deriving (Eq,Ord,Show) type Profile = [Int] data Production = FApply {-# UNPACK #-} !FunId [FCat] | FCoerce {-# UNPACK #-} !FCat | FLit Literal String deriving (Eq,Ord,Show) data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) type FSeq = Array FPointPos FSymbol type FunId = Int type SeqId = Int data ParserInfo = ParserInfo { functions :: Array FunId FFun , sequences :: Array SeqId FSeq , productions :: IntMap.IntMap (Set.Set Production) , startCats :: Map.Map CId [FCat] , totalCats :: {-# UNPACK #-} !FCat } fcatString, fcatInt, fcatFloat, fcatVar :: Int fcatString = (-1) fcatInt = (-2) fcatFloat = (-3) fcatVar = (-4) -- print statistics statGFCC :: PGF -> String statGFCC pgf = unlines [ "Abstract\t" ++ prCId (absname pgf), "Concretes\t" ++ unwords (map prCId (cncnames pgf)), "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf)))) ] -- merge two GFCCs; fails is differens absnames; priority to second arg unionPGF :: PGF -> PGF -> PGF unionPGF one two = case absname one of n | n == wildCId -> two -- extending empty grammar | n == absname two -> one { -- extending grammar with same abstract concretes = Map.union (concretes two) (concretes one), cncnames = union (cncnames one) (cncnames two) } _ -> one -- abstracts don't match ---- print error msg emptyPGF :: PGF emptyPGF = PGF { absname = wildCId, cncnames = [] , gflags = Map.empty, abstract = error "empty grammar, no abstract", concretes = Map.empty }