module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where import PGF.CId import PGF.Expr hiding (Value, Env) import PGF.Type import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.IntMap as IntMap import Data.List import Data.Array import Data.Array.Unboxed -- internal datatypes for PGF -- | An abstract data type representing multilingual grammar -- in Portable Grammar Format. data PGF = PGF { absname :: CId , cncnames :: [CId] , gflags :: Map.Map CId String, -- value of a global flag abstract :: Abstr , concretes :: Map.Map CId Concr } data Abstr = Abstr { aflags :: Map.Map CId String, -- value of a flag funs :: Map.Map CId (Type,Expr), -- type and def of a fun cats :: Map.Map CId [Hypo], -- context of a cat catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) } data Concr = Concr { cflags :: Map.Map CId String, -- value of a flag lins :: Map.Map CId Term, -- lin of a fun opers :: Map.Map CId Term, -- oper generated by subex elim lincats :: Map.Map CId Term, -- lin type of a cat lindefs :: Map.Map CId Term, -- lin default of a cat printnames :: Map.Map CId Term, -- printname of a cat or a fun paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names parser :: Maybe ParserInfo -- parser } data Term = R [Term] | P Term Term | S [Term] | K Tokn | V Int | C Int | F CId | FV [Term] | W String Term | TM String deriving (Eq,Ord,Show) data Tokn = KS String | KP [String] [Alternative] deriving (Eq,Ord,Show) data Alternative = Alt [String] [String] deriving (Eq,Ord,Show) type FCat = Int type FIndex = Int type FPointPos = Int data FSymbol = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex | FSymTok Tokn deriving (Eq,Ord,Show) type Profile = [Int] data Production = FApply {-# UNPACK #-} !FunId [FCat] | FCoerce {-# UNPACK #-} !FCat | FConst Tree String deriving (Eq,Ord,Show) data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) type FSeq = Array FPointPos FSymbol type FunId = Int type SeqId = Int data ParserInfo = ParserInfo { functions :: Array FunId FFun , sequences :: Array SeqId FSeq , productions :: IntMap.IntMap (Set.Set Production) , startCats :: Map.Map CId [FCat] , totalCats :: {-# UNPACK #-} !FCat } fcatString, fcatInt, fcatFloat, fcatVar :: Int fcatString = (-1) fcatInt = (-2) fcatFloat = (-3) fcatVar = (-4) -- print statistics statGFCC :: PGF -> String statGFCC pgf = unlines [ "Abstract\t" ++ prCId (absname pgf), "Concretes\t" ++ unwords (map prCId (cncnames pgf)), "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf)))) ] -- merge two GFCCs; fails is differens absnames; priority to second arg unionPGF :: PGF -> PGF -> PGF unionPGF one two = case absname one of n | n == wildCId -> two -- extending empty grammar | n == absname two -> one { -- extending grammar with same abstract concretes = Map.union (concretes two) (concretes one), cncnames = union (cncnames one) (cncnames two) } _ -> one -- abstracts don't match ---- print error msg emptyPGF :: PGF emptyPGF = PGF { absname = wildCId, cncnames = [] , gflags = Map.empty, abstract = error "empty grammar, no abstract", concretes = Map.empty }