Files
gf-core/src/runtime/haskell/PGF/Macros.hs
2013-09-03 08:58:04 +00:00

228 lines
9.0 KiB
Haskell

module PGF.Macros where
import PGF.CId
import PGF.Data
import Control.Monad
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Array as Array
import Data.Maybe
import Data.List
import Data.Array.IArray
import Text.PrettyPrint
-- operations for manipulating PGF grammars and objects
mapConcretes :: (Concr -> Concr) -> PGF -> PGF
mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }
lookType :: Abstr -> CId -> Type
lookType abs f =
case lookMap (error $ "lookType " ++ show f) f (funs abs) of
(ty,_,_,_,_) -> ty
lookDef :: Abstr -> CId -> Maybe [Equation]
lookDef abs f =
case lookMap (error $ "lookDef " ++ show f) f (funs abs) of
(_,a,eqs,_,_) -> eqs
isData :: Abstr -> CId -> Bool
isData abs f =
case Map.lookup f (funs abs) of
Just (_,_,Nothing,_,_) -> True -- the encoding of data constrs
_ -> False
lookValCat :: Abstr -> CId -> CId
lookValCat abs = valCat . lookType abs
lookStartCat :: PGF -> CId
lookStartCat pgf = mkCId $
case msum $ Data.List.map (Map.lookup (mkCId "startcat")) [gflags pgf, aflags (abstract pgf)] of
Just (LStr s) -> s
_ -> "S"
lookGlobalFlag :: PGF -> CId -> Maybe Literal
lookGlobalFlag pgf f = Map.lookup f (gflags pgf)
lookAbsFlag :: PGF -> CId -> Maybe Literal
lookAbsFlag pgf f = Map.lookup f (aflags (abstract pgf))
lookConcr :: PGF -> Language -> Concr
lookConcr pgf cnc =
lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf
-- use if name fails, use abstract + name; so e.g. "Eng" becomes "DemoEng"
lookConcrComplete :: PGF -> CId -> Concr
lookConcrComplete pgf cnc =
case Map.lookup cnc (concretes pgf) of
Just c -> c
_ -> lookConcr pgf (mkCId (showCId (absname pgf) ++ showCId cnc))
lookConcrFlag :: PGF -> CId -> CId -> Maybe Literal
lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang
functionsToCat :: PGF -> CId -> [(CId,Type)]
functionsToCat pgf cat =
[(f,ty) | (_,f) <- fs, Just (ty,_,_,_,_) <- [Map.lookup f $ funs $ abstract pgf]]
where
(_,fs,_) = lookMap ([],[],0) cat $ cats $ abstract pgf
-- | List of functions that lack linearizations in the given language.
missingLins :: PGF -> Language -> [CId]
missingLins pgf lang = [c | c <- fs, not (hasl c)] where
fs = Map.keys $ funs $ abstract pgf
hasl = hasLin pgf lang
hasLin :: PGF -> Language -> CId -> Bool
hasLin pgf lang f = Map.member f $ lproductions $ lookConcr pgf lang
restrictPGF :: (CId -> Bool) -> PGF -> PGF
restrictPGF cond pgf = pgf {
abstract = abstr {
funs = Map.filterWithKey (\c _ -> cond c) (funs abstr),
cats = Map.map (\(hyps,fs,addr) -> (hyps,filter (cond . snd) fs,addr)) (cats abstr)
}
} ---- restrict concrs also, might be needed
where
abstr = abstract pgf
depth :: Expr -> Int
depth (EAbs _ _ t) = depth t
depth (EApp e1 e2) = max (depth e1) (depth e2) + 1
depth _ = 1
cftype :: [CId] -> CId -> Type
cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val []
typeOfHypo :: Hypo -> Type
typeOfHypo (_,_,ty) = ty
catSkeleton :: Type -> ([CId],CId)
catSkeleton ty = case ty of
DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val)
typeSkeleton :: Type -> ([(Int,CId)],CId)
typeSkeleton ty = case ty of
DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val)
valCat :: Type -> CId
valCat ty = case ty of
DTyp _ val _ -> val
contextLength :: Type -> Int
contextLength ty = case ty of
DTyp hyps _ _ -> length hyps
-- | Show the printname of function or category
showPrintName :: PGF -> Language -> CId -> String
showPrintName pgf lang id = lookMap (showCId id) id $ printnames $ lookMap (error "no lang") lang $ concretes pgf
-- lookup with default value
lookMap :: Ord i => a -> i -> Map.Map i a -> a
lookMap d c m = Map.findWithDefault d c m
--- from Operations
combinations :: [[a]] -> [[a]]
combinations t = case t of
[] -> [[]]
aa:uu -> [a:u | a <- aa, u <- combinations uu]
cidString = mkCId "String"
cidInt = mkCId "Int"
cidFloat = mkCId "Float"
cidVar = mkCId "__gfVar"
-- Utilities for doing linearization
-- | BracketedString represents a sentence that is linearized
-- as usual but we also want to retain the ''brackets'' that
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf Token -- ^ this is the leaf i.e. a single token
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
-- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier
-- is also unique for every bracket. When there are discontinuous
-- phrases then the identifiers are unique for every phrase but
-- not for every bracket since the bracket represents a constituent.
-- The different constituents could still be distinguished by using
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
-- then the constituent indices will be the same for all brackets
-- that represents the same constituent.
data BracketedTokn
= LeafKS [Token]
| LeafKP [Token] [Alternative]
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
deriving Eq
type LinTable = ([CId],Array.Array LIndex [BracketedTokn])
-- | Renders the bracketed string as string where
-- the brackets are shown as @(S ...)@ where
-- @S@ is the category.
showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket cat fid index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | The length of the bracketed string in number of tokens.
lengthBracketedString :: BracketedString -> Int
lengthBracketedString (Leaf _) = 1
lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
untokn :: Maybe String -> BracketedTokn -> (Maybe String,[BracketedString])
untokn nw (LeafKS ts) = (has_tok nw ts,map Leaf ts)
untokn nw (LeafKP d vs) = let ts = filter (not . null) (sel d vs nw)
in (has_tok nw ts,map Leaf ts)
where
sel d vs Nothing = d
sel d vs (Just w) =
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
v:_ -> v
_ -> d
untokn nw (Bracket_ cat fid index fun es bss) =
let (nw',bss') = mapAccumR untokn nw bss
in (nw',[Bracket cat fid index fun es (concat bss')])
has_tok nw [] = nw
has_tok nw (t:ts) = Just t
type CncType = (CId, FId) -- concrete type is the abstract type (the category) + the forest id
mkLinTable :: Concr -> (CncType -> Bool) -> [CId] -> FunId -> [(CncType,CId,[Expr],LinTable)] -> LinTable
mkLinTable cnc filter xs funid args = (xs,listArray (bounds lins) [computeSeq filter (elems (sequences cnc ! seqid)) args | seqid <- elems lins])
where
(CncFun _ lins) = cncfuns cnc ! funid
computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,CId,[Expr],LinTable)] -> [BracketedTokn]
computeSeq filter seq args = concatMap compute seq
where
compute (SymCat d r) = getArg d r
compute (SymLit d r) = getArg d r
compute (SymVar d r) = getVar d r
compute (SymKS ts) = [LeafKS ts]
compute (SymKP ts alts) = [LeafKP ts alts]
getArg d r
| not (null arg_lin) &&
filter ct = [Bracket_ cat fid r fun es arg_lin]
| otherwise = arg_lin
where
arg_lin = lin ! r
(ct@(cat,fid),fun,es,(xs,lin)) = args !! d
getVar d r = [LeafKS [showCId (xs !! r)]]
where
(ct,fun,es,(xs,lin)) = args !! d
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ _ _ bss) = concatMap flattenBracketedString bss