forked from GitHub/gf-core
176 lines
5.3 KiB
Haskell
176 lines
5.3 KiB
Haskell
module PGF.Data where
|
|
|
|
import PGF.CId
|
|
import GF.Text.UTF8
|
|
|
|
import qualified Data.Map as Map
|
|
import qualified Data.Set as Set
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.List
|
|
import Data.Array
|
|
import Data.Array.Unboxed
|
|
|
|
-- internal datatypes for PGF
|
|
|
|
-- | An abstract data type representing multilingual grammar
|
|
-- in Portable Grammar Format.
|
|
data PGF = PGF {
|
|
absname :: CId ,
|
|
cncnames :: [CId] ,
|
|
gflags :: Map.Map CId String, -- value of a global flag
|
|
abstract :: Abstr ,
|
|
concretes :: Map.Map CId Concr
|
|
}
|
|
|
|
data Abstr = Abstr {
|
|
aflags :: Map.Map CId String, -- value of a flag
|
|
funs :: Map.Map CId (Type,Expr), -- type and def of a fun
|
|
cats :: Map.Map CId [Hypo], -- context of a cat
|
|
catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup)
|
|
}
|
|
|
|
data Concr = Concr {
|
|
cflags :: Map.Map CId String, -- value of a flag
|
|
lins :: Map.Map CId Term, -- lin of a fun
|
|
opers :: Map.Map CId Term, -- oper generated by subex elim
|
|
lincats :: Map.Map CId Term, -- lin type of a cat
|
|
lindefs :: Map.Map CId Term, -- lin default of a cat
|
|
printnames :: Map.Map CId Term, -- printname of a cat or a fun
|
|
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
|
|
parser :: Maybe ParserInfo -- parser
|
|
}
|
|
|
|
data Type =
|
|
DTyp [Hypo] CId [Expr]
|
|
deriving (Eq,Ord,Show)
|
|
|
|
data Literal =
|
|
LStr String -- ^ string constant
|
|
| LInt Integer -- ^ integer constant
|
|
| LFlt Double -- ^ floating point constant
|
|
deriving (Eq,Ord,Show)
|
|
|
|
-- | The tree is an evaluated expression in the abstract syntax
|
|
-- of the grammar. The type is especially restricted to not
|
|
-- allow unapplied lambda abstractions. The tree is used directly
|
|
-- from the linearizer and is produced directly from the parser.
|
|
data Tree =
|
|
Abs [CId] Tree -- ^ lambda abstraction. The list of variables is non-empty
|
|
| Var CId -- ^ variable
|
|
| Fun CId [Tree] -- ^ function application
|
|
| Lit Literal -- ^ literal
|
|
| Meta Int -- ^ meta variable
|
|
deriving (Show, Eq, Ord)
|
|
|
|
-- | An expression represents a potentially unevaluated expression
|
|
-- in the abstract syntax of the grammar. It can be evaluated with
|
|
-- the 'expr2tree' function and then linearized or it can be used
|
|
-- directly in the dependent types.
|
|
data Expr =
|
|
EAbs CId Expr -- ^ lambda abstraction
|
|
| EApp Expr Expr -- ^ application
|
|
| ELit Literal -- ^ literal
|
|
| EMeta Int -- ^ meta variable
|
|
| EVar CId -- ^ variable or function reference
|
|
| EEq [Equation] -- ^ lambda function defined as a set of equations with pattern matching
|
|
| EPi CId Expr Expr -- ^ dependent function type
|
|
deriving (Eq,Ord,Show)
|
|
|
|
data Term =
|
|
R [Term]
|
|
| P Term Term
|
|
| S [Term]
|
|
| K Tokn
|
|
| V Int
|
|
| C Int
|
|
| F CId
|
|
| FV [Term]
|
|
| W String Term
|
|
| TM String
|
|
deriving (Eq,Ord,Show)
|
|
|
|
data Tokn =
|
|
KS String
|
|
| KP [String] [Alternative]
|
|
deriving (Eq,Ord,Show)
|
|
|
|
data Alternative =
|
|
Alt [String] [String]
|
|
deriving (Eq,Ord,Show)
|
|
|
|
data Hypo =
|
|
Hyp CId Type
|
|
deriving (Eq,Ord,Show)
|
|
|
|
-- | The equation is used to define lambda function as a sequence
|
|
-- of equations with pattern matching. The list of 'Expr' represents
|
|
-- the patterns and the second 'Expr' is the function body for this
|
|
-- equation.
|
|
data Equation =
|
|
Equ [Expr] Expr
|
|
deriving (Eq,Ord,Show)
|
|
|
|
|
|
type FCat = Int
|
|
type FIndex = Int
|
|
type FPointPos = Int
|
|
data FSymbol
|
|
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
|
| FSymTok Tokn
|
|
deriving (Eq,Ord,Show)
|
|
type Profile = [Int]
|
|
data Production
|
|
= FApply {-# UNPACK #-} !FunId [FCat]
|
|
| FCoerce {-# UNPACK #-} !FCat
|
|
| FLit Literal String
|
|
deriving (Eq,Ord,Show)
|
|
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
|
type FSeq = Array FPointPos FSymbol
|
|
type FunId = Int
|
|
type SeqId = Int
|
|
|
|
data ParserInfo
|
|
= ParserInfo { functions :: Array FunId FFun
|
|
, sequences :: Array SeqId FSeq
|
|
, productions :: IntMap.IntMap (Set.Set Production)
|
|
, startCats :: Map.Map CId [FCat]
|
|
, totalCats :: {-# UNPACK #-} !FCat
|
|
}
|
|
|
|
|
|
fcatString, fcatInt, fcatFloat, fcatVar :: Int
|
|
fcatString = (-1)
|
|
fcatInt = (-2)
|
|
fcatFloat = (-3)
|
|
fcatVar = (-4)
|
|
|
|
|
|
-- print statistics
|
|
|
|
statGFCC :: PGF -> String
|
|
statGFCC pgf = unlines [
|
|
"Abstract\t" ++ prCId (absname pgf),
|
|
"Concretes\t" ++ unwords (map prCId (cncnames pgf)),
|
|
"Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf))))
|
|
]
|
|
|
|
-- merge two GFCCs; fails is differens absnames; priority to second arg
|
|
|
|
unionPGF :: PGF -> PGF -> PGF
|
|
unionPGF one two = case absname one of
|
|
n | n == wildCId -> two -- extending empty grammar
|
|
| n == absname two -> one { -- extending grammar with same abstract
|
|
concretes = Map.union (concretes two) (concretes one),
|
|
cncnames = union (cncnames one) (cncnames two)
|
|
}
|
|
_ -> one -- abstracts don't match ---- print error msg
|
|
|
|
emptyPGF :: PGF
|
|
emptyPGF = PGF {
|
|
absname = wildCId,
|
|
cncnames = [] ,
|
|
gflags = Map.empty,
|
|
abstract = error "empty grammar, no abstract",
|
|
concretes = Map.empty
|
|
}
|