mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-10 19:52:50 -06:00
130 lines
3.5 KiB
Haskell
130 lines
3.5 KiB
Haskell
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where
|
|
|
|
import PGF.CId
|
|
import PGF.Expr hiding (Value, Env)
|
|
import PGF.Type
|
|
|
|
import qualified Data.Map as Map
|
|
import qualified Data.Set as Set
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.List
|
|
import Data.Array
|
|
import Data.Array.Unboxed
|
|
|
|
-- internal datatypes for PGF
|
|
|
|
-- | An abstract data type representing multilingual grammar
|
|
-- in Portable Grammar Format.
|
|
data PGF = PGF {
|
|
absname :: CId ,
|
|
cncnames :: [CId] ,
|
|
gflags :: Map.Map CId String, -- value of a global flag
|
|
abstract :: Abstr ,
|
|
concretes :: Map.Map CId Concr
|
|
}
|
|
|
|
data Abstr = Abstr {
|
|
aflags :: Map.Map CId String, -- value of a flag
|
|
funs :: Map.Map CId (Type,Expr), -- type and def of a fun
|
|
cats :: Map.Map CId [Hypo], -- context of a cat
|
|
catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup)
|
|
}
|
|
|
|
data Concr = Concr {
|
|
cflags :: Map.Map CId String, -- value of a flag
|
|
lins :: Map.Map CId Term, -- lin of a fun
|
|
opers :: Map.Map CId Term, -- oper generated by subex elim
|
|
lincats :: Map.Map CId Term, -- lin type of a cat
|
|
lindefs :: Map.Map CId Term, -- lin default of a cat
|
|
printnames :: Map.Map CId Term, -- printname of a cat or a fun
|
|
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
|
|
parser :: Maybe ParserInfo -- parser
|
|
}
|
|
|
|
data Term =
|
|
R [Term]
|
|
| P Term Term
|
|
| S [Term]
|
|
| K Tokn
|
|
| V Int
|
|
| C Int
|
|
| F CId
|
|
| FV [Term]
|
|
| W String Term
|
|
| TM String
|
|
deriving (Eq,Ord,Show)
|
|
|
|
data Tokn =
|
|
KS String
|
|
| KP [String] [Alternative]
|
|
deriving (Eq,Ord,Show)
|
|
|
|
data Alternative =
|
|
Alt [String] [String]
|
|
deriving (Eq,Ord,Show)
|
|
|
|
|
|
type FCat = Int
|
|
type FIndex = Int
|
|
type FPointPos = Int
|
|
data FSymbol
|
|
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
|
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
|
| FSymTok Tokn
|
|
deriving (Eq,Ord,Show)
|
|
type Profile = [Int]
|
|
data Production
|
|
= FApply {-# UNPACK #-} !FunId [FCat]
|
|
| FCoerce {-# UNPACK #-} !FCat
|
|
| FConst Tree String
|
|
deriving (Eq,Ord,Show)
|
|
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
|
type FSeq = Array FPointPos FSymbol
|
|
type FunId = Int
|
|
type SeqId = Int
|
|
|
|
data ParserInfo
|
|
= ParserInfo { functions :: Array FunId FFun
|
|
, sequences :: Array SeqId FSeq
|
|
, productions :: IntMap.IntMap (Set.Set Production)
|
|
, startCats :: Map.Map CId [FCat]
|
|
, totalCats :: {-# UNPACK #-} !FCat
|
|
}
|
|
|
|
|
|
fcatString, fcatInt, fcatFloat, fcatVar :: Int
|
|
fcatString = (-1)
|
|
fcatInt = (-2)
|
|
fcatFloat = (-3)
|
|
fcatVar = (-4)
|
|
|
|
|
|
-- print statistics
|
|
|
|
statGFCC :: PGF -> String
|
|
statGFCC pgf = unlines [
|
|
"Abstract\t" ++ prCId (absname pgf),
|
|
"Concretes\t" ++ unwords (map prCId (cncnames pgf)),
|
|
"Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf))))
|
|
]
|
|
|
|
-- merge two GFCCs; fails is differens absnames; priority to second arg
|
|
|
|
unionPGF :: PGF -> PGF -> PGF
|
|
unionPGF one two = case absname one of
|
|
n | n == wildCId -> two -- extending empty grammar
|
|
| n == absname two -> one { -- extending grammar with same abstract
|
|
concretes = Map.union (concretes two) (concretes one),
|
|
cncnames = union (cncnames one) (cncnames two)
|
|
}
|
|
_ -> one -- abstracts don't match ---- print error msg
|
|
|
|
emptyPGF :: PGF
|
|
emptyPGF = PGF {
|
|
absname = wildCId,
|
|
cncnames = [] ,
|
|
gflags = Map.empty,
|
|
abstract = error "empty grammar, no abstract",
|
|
concretes = Map.empty
|
|
}
|