mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-06 09:42:50 -06:00
+ References to modules under src/compiler have been eliminated from the PGF library (under src/runtime/haskell). Only two functions had to be moved (from GF.Data.Utilities to PGF.Utilities) to make this possible, other apparent dependencies turned out to be vacuous. + In gf.cabal, the GF executable no longer directly depends on the PGF library source directory, but only on the exposed library modules. This means that there is less duplication in gf.cabal and that the 30 modules in the PGF library will no longer be compiled twice while building GF. To make this possible, additional PGF library modules have been exposed, even though they should probably be considered for internal use only. They could be collected in a PGF.Internal module, or marked as "unstable", to make this explicit. + Also, by using the -fwarn-unused-imports flag, ~220 redundant imports were found and removed, reducing the total number of imports by ~15%.
134 lines
5.0 KiB
Haskell
134 lines
5.0 KiB
Haskell
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where
|
|
|
|
import PGF.CId
|
|
import PGF.Expr hiding (Value, Sig, Env, Tree, eval, apply, applyValue, value2expr)
|
|
import PGF.Type
|
|
|
|
import qualified Data.Map as Map
|
|
import qualified Data.Set as Set
|
|
import qualified Data.IntMap as IntMap
|
|
import qualified Data.IntSet as IntSet
|
|
import qualified PGF.TrieMap as TMap
|
|
import qualified Data.ByteString as BS
|
|
import Data.Array.IArray
|
|
import Data.Array.Unboxed
|
|
--import Data.List
|
|
|
|
|
|
-- internal datatypes for PGF
|
|
|
|
-- | An abstract data type representing multilingual grammar
|
|
-- in Portable Grammar Format.
|
|
data PGF = PGF {
|
|
gflags :: Map.Map CId Literal, -- value of a global flag
|
|
absname :: CId ,
|
|
abstract :: Abstr ,
|
|
concretes :: Map.Map CId Concr
|
|
}
|
|
|
|
data Abstr = Abstr {
|
|
aflags :: Map.Map CId Literal, -- ^ value of a flag
|
|
funs :: Map.Map CId (Type,Int,Maybe [Equation],Double,BCAddr), -- ^ type, arrity and definition of function + probability
|
|
cats :: Map.Map CId ([Hypo],[(Double, CId)],BCAddr), -- ^ 1. context of a category
|
|
-- ^ 2. functions of a category. The order in the list is important,
|
|
-- this is the order in which the type singatures are given in the source.
|
|
-- The termination of the exhaustive generation might depend on this.
|
|
code :: BS.ByteString
|
|
}
|
|
|
|
data Concr = Concr {
|
|
cflags :: Map.Map CId Literal, -- value of a flag
|
|
printnames :: Map.Map CId String, -- printname of a cat or a fun
|
|
cncfuns :: Array FunId CncFun,
|
|
lindefs :: IntMap.IntMap [FunId],
|
|
linrefs :: IntMap.IntMap [FunId],
|
|
sequences :: Array SeqId Sequence,
|
|
productions :: IntMap.IntMap (Set.Set Production), -- the original productions loaded from the PGF file
|
|
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
|
|
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
|
|
cnccats :: Map.Map CId CncCat,
|
|
lexicon :: IntMap.IntMap (IntMap.IntMap (TMap.TrieMap Token IntSet.IntSet)),
|
|
totalCats :: {-# UNPACK #-} !FId
|
|
}
|
|
|
|
type Token = String
|
|
type FId = Int
|
|
type LIndex = Int
|
|
type DotPos = Int
|
|
data Symbol
|
|
= SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
|
|
| SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
|
|
| SymVar {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
|
| SymKS Token
|
|
| SymKP [Symbol] [([Symbol],[String])]
|
|
| SymBIND -- the special BIND token
|
|
| SymNE -- non exist (this should be last constructor to simplify the binary search in the runtime)
|
|
deriving (Eq,Ord,Show)
|
|
data Production
|
|
= PApply {-# UNPACK #-} !FunId [PArg]
|
|
| PCoerce {-# UNPACK #-} !FId
|
|
| PConst CId Expr [Token]
|
|
deriving (Eq,Ord,Show)
|
|
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
|
|
data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String)
|
|
data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
|
|
type Sequence = Array DotPos Symbol
|
|
type FunId = Int
|
|
type SeqId = Int
|
|
type BCAddr = Int
|
|
|
|
|
|
-- merge two PGFs; fails is differens absnames; priority to second arg
|
|
|
|
unionPGF :: PGF -> PGF -> PGF
|
|
unionPGF one two = fst $ msgUnionPGF one two
|
|
|
|
msgUnionPGF :: PGF -> PGF -> (PGF, Maybe String)
|
|
msgUnionPGF one two = case absname one of
|
|
n | n == wildCId -> (two, Nothing) -- extending empty grammar
|
|
| n == absname two && haveSameFunsPGF one two -> (one { -- extending grammar with same abstract
|
|
concretes = Map.union (concretes two) (concretes one)
|
|
}, Nothing)
|
|
_ -> (two, -- abstracts don't match, discard the old one -- error msg in Importing.ioUnionPGF
|
|
Just "Abstract changed, previous concretes discarded.")
|
|
|
|
emptyPGF :: PGF
|
|
emptyPGF = PGF {
|
|
gflags = Map.empty,
|
|
absname = wildCId,
|
|
abstract = error "empty grammar, no abstract",
|
|
concretes = Map.empty
|
|
}
|
|
|
|
-- sameness of function type signatures, checked when importing a new concrete in env
|
|
haveSameFunsPGF :: PGF -> PGF -> Bool
|
|
haveSameFunsPGF one two =
|
|
let
|
|
fsone = [(f,t) | (f,(t,_,_,_,_)) <- Map.toList (funs (abstract one))]
|
|
fstwo = [(f,t) | (f,(t,_,_,_,_)) <- Map.toList (funs (abstract two))]
|
|
in fsone == fstwo
|
|
|
|
-- | This is just a 'CId' with the language name.
|
|
-- A language name is the identifier that you write in the
|
|
-- top concrete or abstract module in GF after the
|
|
-- concrete/abstract keyword. Example:
|
|
--
|
|
-- > abstract Lang = ...
|
|
-- > concrete LangEng of Lang = ...
|
|
type Language = CId
|
|
|
|
readLanguage :: String -> Maybe Language
|
|
readLanguage = readCId
|
|
|
|
showLanguage :: Language -> String
|
|
showLanguage = showCId
|
|
|
|
fidString, fidInt, fidFloat, fidVar :: FId
|
|
fidString = (-1)
|
|
fidInt = (-2)
|
|
fidFloat = (-3)
|
|
fidVar = (-4)
|
|
|
|
isPredefFId :: FId -> Bool
|
|
isPredefFId = (`elem` [fidString, fidInt, fidFloat, fidVar])
|