1
0
forked from GitHub/gf-core
Files
gf-core/src/PGF/Data.hs

176 lines
5.3 KiB
Haskell

module PGF.Data where
import PGF.CId
import GF.Text.UTF8
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.List
import Data.Array
import Data.Array.Unboxed
-- internal datatypes for PGF
-- | An abstract data type representing multilingual grammar
-- in Portable Grammar Format.
data PGF = PGF {
absname :: CId ,
cncnames :: [CId] ,
gflags :: Map.Map CId String, -- value of a global flag
abstract :: Abstr ,
concretes :: Map.Map CId Concr
}
data Abstr = Abstr {
aflags :: Map.Map CId String, -- value of a flag
funs :: Map.Map CId (Type,Expr), -- type and def of a fun
cats :: Map.Map CId [Hypo], -- context of a cat
catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup)
}
data Concr = Concr {
cflags :: Map.Map CId String, -- value of a flag
lins :: Map.Map CId Term, -- lin of a fun
opers :: Map.Map CId Term, -- oper generated by subex elim
lincats :: Map.Map CId Term, -- lin type of a cat
lindefs :: Map.Map CId Term, -- lin default of a cat
printnames :: Map.Map CId Term, -- printname of a cat or a fun
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
parser :: Maybe ParserInfo -- parser
}
data Type =
DTyp [Hypo] CId [Expr]
deriving (Eq,Ord,Show)
data Literal =
LStr String -- ^ string constant
| LInt Integer -- ^ integer constant
| LFlt Double -- ^ floating point constant
deriving (Eq,Ord,Show)
-- | The tree is an evaluated expression in the abstract syntax
-- of the grammar. The type is especially restricted to not
-- allow unapplied lambda abstractions. The tree is used directly
-- from the linearizer and is produced directly from the parser.
data Tree =
Abs [CId] Tree -- ^ lambda abstraction. The list of variables is non-empty
| Var CId -- ^ variable
| Fun CId [Tree] -- ^ function application
| Lit Literal -- ^ literal
| Meta Int -- ^ meta variable
deriving (Show, Eq, Ord)
-- | An expression represents a potentially unevaluated expression
-- in the abstract syntax of the grammar. It can be evaluated with
-- the 'expr2tree' function and then linearized or it can be used
-- directly in the dependent types.
data Expr =
EAbs CId Expr -- ^ lambda abstraction
| EApp Expr Expr -- ^ application
| ELit Literal -- ^ literal
| EMeta Int -- ^ meta variable
| EVar CId -- ^ variable or function reference
| EEq [Equation] -- ^ lambda function defined as a set of equations with pattern matching
| EPi CId Expr Expr -- ^ dependent function type
deriving (Eq,Ord,Show)
data Term =
R [Term]
| P Term Term
| S [Term]
| K Tokn
| V Int
| C Int
| F CId
| FV [Term]
| W String Term
| TM String
deriving (Eq,Ord,Show)
data Tokn =
KS String
| KP [String] [Alternative]
deriving (Eq,Ord,Show)
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
data Hypo =
Hyp CId Type
deriving (Eq,Ord,Show)
-- | The equation is used to define lambda function as a sequence
-- of equations with pattern matching. The list of 'Expr' represents
-- the patterns and the second 'Expr' is the function body for this
-- equation.
data Equation =
Equ [Expr] Expr
deriving (Eq,Ord,Show)
type FCat = Int
type FIndex = Int
type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymTok Tokn
deriving (Eq,Ord,Show)
type Profile = [Int]
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FLit Literal String
deriving (Eq,Ord,Show)
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data ParserInfo
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions :: IntMap.IntMap (Set.Set Production)
, startCats :: Map.Map CId [FCat]
, totalCats :: {-# UNPACK #-} !FCat
}
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
-- print statistics
statGFCC :: PGF -> String
statGFCC pgf = unlines [
"Abstract\t" ++ prCId (absname pgf),
"Concretes\t" ++ unwords (map prCId (cncnames pgf)),
"Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf))))
]
-- merge two GFCCs; fails is differens absnames; priority to second arg
unionPGF :: PGF -> PGF -> PGF
unionPGF one two = case absname one of
n | n == wildCId -> two -- extending empty grammar
| n == absname two -> one { -- extending grammar with same abstract
concretes = Map.union (concretes two) (concretes one),
cncnames = union (cncnames one) (cncnames two)
}
_ -> one -- abstracts don't match ---- print error msg
emptyPGF :: PGF
emptyPGF = PGF {
absname = wildCId,
cncnames = [] ,
gflags = Map.empty,
abstract = error "empty grammar, no abstract",
concretes = Map.empty
}