module PGF.Data where import PGF.CId import GF.Text.UTF8 import GF.Data.Assoc import qualified Data.Map as Map import Data.List import Data.Array -- internal datatypes for PGF -- | An abstract data type representing multilingual grammar -- in Portable Grammar Format. data PGF = PGF { absname :: CId , cncnames :: [CId] , gflags :: Map.Map CId String, -- value of a global flag abstract :: Abstr , concretes :: Map.Map CId Concr } data Abstr = Abstr { aflags :: Map.Map CId String, -- value of a flag funs :: Map.Map CId (Type,Exp), -- type and def of a fun cats :: Map.Map CId [Hypo], -- context of a cat catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) } data Concr = Concr { cflags :: Map.Map CId String, -- value of a flag lins :: Map.Map CId Term, -- lin of a fun opers :: Map.Map CId Term, -- oper generated by subex elim lincats :: Map.Map CId Term, -- lin type of a cat lindefs :: Map.Map CId Term, -- lin default of a cat printnames :: Map.Map CId Term, -- printname of a cat or a fun paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names parser :: Maybe ParserInfo -- parser } data Type = DTyp [Hypo] CId [Exp] deriving (Eq,Ord,Show) -- | An expression representing the abstract syntax tree -- in PGF. The same expression is used in the dependent -- types. data Exp = EAbs [CId] Exp -- ^ lambda abstraction. The list should contain at least one variable | EApp CId [Exp] -- ^ application. Note that unevaluated lambda abstractions are not allowed | EStr String -- ^ string constant | EInt Integer -- ^ integer constant | EFloat Double -- ^ floating point constant | EMeta Int -- ^ meta variable | EVar CId -- ^ variable reference | EEq [Equation] -- ^ lambda function defined as a set of equations with pattern matching deriving (Eq,Ord,Show) data Term = R [Term] | P Term Term | S [Term] | K Tokn | V Int | C Int | F CId | FV [Term] | W String Term | TM String deriving (Eq,Ord,Show) data Tokn = KS String | KP [String] [Variant] deriving (Eq,Ord,Show) data Variant = Var [String] [String] deriving (Eq,Ord,Show) data Hypo = Hyp CId Type deriving (Eq,Ord,Show) -- | The equation is used to define lambda function as a sequence -- of equations with pattern matching. The list of 'Exp' represents -- the patterns and the second 'Exp' is the function body for this -- equation. data Equation = Equ [Exp] Exp deriving (Eq,Ord,Show) type FToken = String type FCat = Int type FIndex = Int data FSymbol = FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int | FSymTok FToken type Profile = [Int] type FPointPos = Int type FGrammar = ([FRule], Map.Map CId [FCat]) data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) type RuleId = Int data ParserInfo = ParserInfo { allRules :: Array RuleId FRule , topdownRules :: Assoc FCat [RuleId] -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): -- , emptyRules :: [RuleId] , epsilonRules :: [RuleId] -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): , leftcornerCats :: Assoc FCat [RuleId] , leftcornerTokens :: Assoc FToken [RuleId] -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): , grammarCats :: [FCat] , grammarToks :: [FToken] , startupCats :: Map.Map CId [FCat] } fcatString, fcatInt, fcatFloat, fcatVar :: Int fcatString = (-1) fcatInt = (-2) fcatFloat = (-3) fcatVar = (-4) -- print statistics statGFCC :: PGF -> String statGFCC pgf = unlines [ "Abstract\t" ++ prCId (absname pgf), "Concretes\t" ++ unwords (map prCId (cncnames pgf)), "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf)))) ] -- merge two GFCCs; fails is differens absnames; priority to second arg unionPGF :: PGF -> PGF -> PGF unionPGF one two = case absname one of n | n == wildCId -> two -- extending empty grammar | n == absname two -> one { -- extending grammar with same abstract concretes = Map.union (concretes two) (concretes one), cncnames = union (cncnames two) (cncnames one) } _ -> one -- abstracts don't match ---- print error msg emptyPGF :: PGF emptyPGF = PGF { absname = wildCId, cncnames = [] , gflags = Map.empty, abstract = error "empty grammar, no abstract", concretes = Map.empty } -- encode idenfifiers and strings in UTF8 utf8GFCC :: PGF -> PGF utf8GFCC pgf = pgf { concretes = Map.map u8concr (concretes pgf) } where u8concr cnc = cnc { lins = Map.map u8term (lins cnc), opers = Map.map u8term (opers cnc) } u8term = convertStringsInTerm encodeUTF8 ---- TODO: convert identifiers and flags convertStringsInTerm conv t = case t of K (KS s) -> K (KS (conv s)) W s r -> W (conv s) (convs r) R ts -> R $ map convs ts S ts -> S $ map convs ts FV ts -> FV $ map convs ts P u v -> P (convs u) (convs v) _ -> t where convs = convertStringsInTerm conv