Files
gf-core/src-3.0/PGF/Data.hs

185 lines
5.2 KiB
Haskell

module PGF.Data where
import PGF.CId
import GF.Text.UTF8
import GF.Data.Assoc
import qualified Data.Map as Map
import Data.List
import Data.Array
-- internal datatypes for PGF
-- | An abstract data type representing multilingual grammar
-- in Portable Grammar Format.
data PGF = PGF {
absname :: CId ,
cncnames :: [CId] ,
gflags :: Map.Map CId String, -- value of a global flag
abstract :: Abstr ,
concretes :: Map.Map CId Concr
}
data Abstr = Abstr {
aflags :: Map.Map CId String, -- value of a flag
funs :: Map.Map CId (Type,Exp), -- type and def of a fun
cats :: Map.Map CId [Hypo], -- context of a cat
catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup)
}
data Concr = Concr {
cflags :: Map.Map CId String, -- value of a flag
lins :: Map.Map CId Term, -- lin of a fun
opers :: Map.Map CId Term, -- oper generated by subex elim
lincats :: Map.Map CId Term, -- lin type of a cat
lindefs :: Map.Map CId Term, -- lin default of a cat
printnames :: Map.Map CId Term, -- printname of a cat or a fun
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
parser :: Maybe ParserInfo -- parser
}
data Type =
DTyp [Hypo] CId [Exp]
deriving (Eq,Ord,Show)
-- | An expression representing the abstract syntax tree
-- in PGF. The same expression is used in the dependent
-- types.
data Exp =
EAbs [CId] Exp -- ^ lambda abstraction. The list should contain at least one variable
| EApp CId [Exp] -- ^ application. Note that unevaluated lambda abstractions are not allowed
| EStr String -- ^ string constant
| EInt Integer -- ^ integer constant
| EFloat Double -- ^ floating point constant
| EMeta Int -- ^ meta variable
| EVar CId -- ^ variable reference
| EEq [Equation] -- ^ lambda function defined as a set of equations with pattern matching
deriving (Eq,Ord,Show)
data Term =
R [Term]
| P Term Term
| S [Term]
| K Tokn
| V Int
| C Int
| F CId
| FV [Term]
| W String Term
| TM String
deriving (Eq,Ord,Show)
data Tokn =
KS String
| KP [String] [Variant]
deriving (Eq,Ord,Show)
data Variant =
Var [String] [String]
deriving (Eq,Ord,Show)
data Hypo =
Hyp CId Type
deriving (Eq,Ord,Show)
-- | The equation is used to define lambda function as a sequence
-- of equations with pattern matching. The list of 'Exp' represents
-- the patterns and the second 'Exp' is the function body for this
-- equation.
data Equation =
Equ [Exp] Exp
deriving (Eq,Ord,Show)
type FToken = String
type FCat = Int
type FIndex = Int
data FSymbol
= FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int
| FSymTok FToken
type Profile = [Int]
type FPointPos = Int
type FGrammar = ([FRule], Map.Map CId [FCat])
data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol))
type RuleId = Int
data ParserInfo
= ParserInfo { allRules :: Array RuleId FRule
, topdownRules :: Assoc FCat [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
-- , emptyRules :: [RuleId]
, epsilonRules :: [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, leftcornerCats :: Assoc FCat [RuleId]
, leftcornerTokens :: Assoc FToken [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, grammarCats :: [FCat]
, grammarToks :: [FToken]
, startupCats :: Map.Map CId [FCat]
}
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
-- print statistics
statGFCC :: PGF -> String
statGFCC pgf = unlines [
"Abstract\t" ++ prCId (absname pgf),
"Concretes\t" ++ unwords (map prCId (cncnames pgf)),
"Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf))))
]
-- merge two GFCCs; fails is differens absnames; priority to second arg
unionPGF :: PGF -> PGF -> PGF
unionPGF one two = case absname one of
n | n == wildCId -> two -- extending empty grammar
| n == absname two -> one { -- extending grammar with same abstract
concretes = Map.union (concretes two) (concretes one),
cncnames = union (cncnames two) (cncnames one)
}
_ -> one -- abstracts don't match ---- print error msg
emptyPGF :: PGF
emptyPGF = PGF {
absname = wildCId,
cncnames = [] ,
gflags = Map.empty,
abstract = error "empty grammar, no abstract",
concretes = Map.empty
}
-- encode idenfifiers and strings in UTF8
utf8GFCC :: PGF -> PGF
utf8GFCC pgf = pgf {
concretes = Map.map u8concr (concretes pgf)
}
where
u8concr cnc = cnc {
lins = Map.map u8term (lins cnc),
opers = Map.map u8term (opers cnc)
}
u8term = convertStringsInTerm encodeUTF8
---- TODO: convert identifiers and flags
convertStringsInTerm conv t = case t of
K (KS s) -> K (KS (conv s))
W s r -> W (conv s) (convs r)
R ts -> R $ map convs ts
S ts -> S $ map convs ts
FV ts -> FV $ map convs ts
P u v -> P (convs u) (convs v)
_ -> t
where
convs = convertStringsInTerm conv