1
0
forked from GitHub/gf-core
Files
gf-core/src/GF/Compile/ShellState.hs
2006-12-28 16:45:57 +00:00

566 lines
21 KiB
Haskell

----------------------------------------------------------------------
-- |
-- Module : ShellState
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/14 16:03:41 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.53 $
--
-- (Description of the module)
-----------------------------------------------------------------------------
module GF.Compile.ShellState where
import GF.Data.Operations
import GF.Canon.GFC
import GF.Canon.AbsGFC
import GF.Canon.CanonToGFCC as C2GFCC
import GF.Grammar.Macros
import GF.Grammar.MMacros
import GF.Canon.Look
import GF.Canon.Subexpressions
import GF.Grammar.LookAbs
import GF.Compile.ModDeps
import GF.Compile.Evaluate
import qualified GF.Infra.Modules as M
import qualified GF.Grammar.Grammar as G
import qualified GF.Grammar.PrGrammar as P
import GF.CF.CF
import GF.CF.CFIdent
import GF.CF.CanonToCF
import GF.UseGrammar.Morphology
import GF.Probabilistic.Probabilistic
import GF.Compile.NoParse
import GF.Infra.Option
import GF.Infra.Ident
import GF.Infra.UseIO (justModuleName)
import GF.System.Arch (ModTime)
import qualified Transfer.InterpreterAPI as T
import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE
import qualified GF.Conversion.GFC as Cnv
import qualified GF.Conversion.SimpleToFCFG as FCnv
import qualified GF.Parsing.GFC as Prs
import Control.Monad (mplus)
import Data.List (nub,nubBy)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe)
-- AR 11/11/2001 -- 17/6/2003 (for modules) ---- unfinished
-- | multilingual state with grammars and options
data ShellState = ShSt {
abstract :: Maybe Ident , -- ^ pointer to actual abstract, if not empty st
concrete :: Maybe Ident , -- ^ pointer to primary concrete
concretes :: [((Ident,Ident),Bool)], -- ^ list of all concretes, and whether active
canModules :: CanonGrammar , -- ^ compiled abstracts and concretes
srcModules :: G.SourceGrammar , -- ^ saved resource modules
cfs :: [(Ident,CF)] , -- ^ context-free grammars (small, no parameters, very over-generating)
abstracts :: [(Ident,[Ident])], -- ^ abstracts and their associated concretes
mcfgs :: [(Ident, Cnv.MGrammar)], -- ^ MCFG, converted according to Ljunglöf (2004, ch 3)
fcfgs :: [(Ident, Cnv.FGrammar)], -- ^ FCFG, optimized MCFG by Krasimir Angelov
cfgs :: [(Ident, Cnv.CGrammar)], -- ^ CFG, converted from mcfg
-- (large, with parameters, no-so overgenerating)
pInfos :: [(Ident, Prs.PInfo)], -- ^ parsing information (compiled mcfg&cfg grammars)
morphos :: [(Ident,Morpho)], -- ^ morphologies
treebanks :: [(Ident,Treebank)], -- ^ treebanks
probss :: [(Ident,Probs)], -- ^ probability distributions
gloptions :: Options, -- ^ global options
readFiles :: [(String,(FilePath,ModTime))],-- ^ files read
absCats :: [(G.Cat,(G.Context,
[(G.Fun,G.Type)],
[((G.Fun,Int),G.Type)]))], -- ^ cats, (their contexts,
-- functions to them,
-- functions on them)
statistics :: [Statistics], -- ^ statistics on grammars
transfers :: [(Ident,T.Env)], -- ^ transfer modules
evalEnv :: EEnv -- ^ evaluation environment
}
type Treebank = Map.Map String [String] -- string, trees
actualConcretes :: ShellState -> [((Ident,Ident),Bool)]
actualConcretes sh = nub [((c,c),b) |
Just a <- [abstract sh],
((c,_),_) <- concretes sh, ----concretesOfAbstract sh a,
let b = True -----
]
concretesOfAbstract :: ShellState -> Ident -> [Ident]
concretesOfAbstract sh a = [c | (b,cs) <- abstracts sh, b == a, c <- cs]
data Statistics =
StDepTypes Bool -- ^ whether there are dependent types
| StBoundVars [G.Cat] -- ^ which categories have bound variables
--- -- etc
deriving (Eq,Ord)
emptyShellState :: ShellState
emptyShellState = ShSt {
abstract = Nothing,
concrete = Nothing,
concretes = [],
canModules = M.emptyMGrammar,
srcModules = M.emptyMGrammar,
cfs = [],
abstracts = [],
mcfgs = [],
fcfgs = [],
cfgs = [],
pInfos = [],
morphos = [],
treebanks = [],
probss = [],
gloptions = noOptions,
readFiles = [],
absCats = [],
statistics = [],
transfers = [],
evalEnv = emptyEEnv
}
optInitShellState :: Options -> ShellState
optInitShellState os = addGlobalOptions os emptyShellState
type Language = Ident
language :: String -> Language
language = identC
prLanguage :: Language -> String
prLanguage = prIdent
-- | grammar for one language in a state, comprising its abs and cnc
data StateGrammar = StGr {
absId :: Ident,
cncId :: Ident,
grammar :: CanonGrammar,
cf :: CF,
mcfg :: Cnv.MGrammar,
fcfg :: Cnv.FGrammar,
cfg :: Cnv.CGrammar,
pInfo :: Prs.PInfo,
morpho :: Morpho,
probs :: Probs,
loptions :: Options
}
emptyStateGrammar :: StateGrammar
emptyStateGrammar = StGr {
absId = identC "#EMPTY", ---
cncId = identC "#EMPTY", ---
grammar = M.emptyMGrammar,
cf = emptyCF,
mcfg = [],
fcfg = [],
cfg = [],
pInfo = Prs.buildPInfo [] [] [],
morpho = emptyMorpho,
probs = emptyProbs,
loptions = noOptions
}
-- analysing shell grammar into parts
stateGrammarST :: StateGrammar -> CanonGrammar
stateCF :: StateGrammar -> CF
stateMCFG :: StateGrammar -> Cnv.MGrammar
stateFCFG :: StateGrammar -> Cnv.FGrammar
stateCFG :: StateGrammar -> Cnv.CGrammar
statePInfo :: StateGrammar -> Prs.PInfo
stateMorpho :: StateGrammar -> Morpho
stateProbs :: StateGrammar -> Probs
stateOptions :: StateGrammar -> Options
stateGrammarWords :: StateGrammar -> [String]
stateGrammarLang :: StateGrammar -> (CanonGrammar, Ident)
stateGrammarST = grammar
stateCF = cf
stateMCFG = mcfg
stateFCFG = fcfg
stateCFG = cfg
statePInfo = pInfo
stateMorpho = morpho
stateProbs = probs
stateOptions = loptions
stateGrammarWords = allMorphoWords . stateMorpho
stateGrammarLang st = (grammar st, cncId st)
---- this should be computed at compile time and stored
stateHasHOAS :: StateGrammar -> Bool
stateHasHOAS = hasHOAS . stateGrammarST
cncModuleIdST :: StateGrammar -> CanonGrammar
cncModuleIdST = stateGrammarST
-- | form a shell state from a canonical grammar
grammar2shellState :: Options -> (CanonGrammar, G.SourceGrammar) -> Err ShellState
grammar2shellState opts (gr,sgr) =
updateShellState opts doParseAll Nothing emptyShellState ((0,sgr,gr,emptyEEnv),[]) --- is 0 safe?
-- | update a shell state from a canonical grammar
updateShellState :: Options -> NoParse -> Maybe Ident -> ShellState ->
((Int,G.SourceGrammar,CanonGrammar,EEnv),[(String,(FilePath,ModTime))]) ->
Err ShellState
updateShellState opts ign mcnc sh ((_,sgr,gr,eenv),rts) = do
let cgr0 = M.updateMGrammar (canModules sh) gr
-- a0 = abstract of old state
-- a1 = abstract of compiled grammar
let a0 = abstract sh
a1 <- return $ case mcnc of
Just cnc -> err (const Nothing) Just $ M.abstractOfConcrete cgr0 cnc
_ -> M.greatestAbstract cgr0
-- abstr0 = a1 if it exists
let (abstr0,isNew) = case (a0,a1) of
(Just a, Just b) | a /= b -> (a1, True)
(Nothing, Just _) -> (a1, True)
_ -> (a0, False)
let concrs0 = maybe [] (M.allConcretes cgr0) abstr0
let abstrs = nubBy (\ (x,_) (y,_) -> x == y) $
maybe id (\a -> ((a,concrs0):)) abstr0 $ abstracts sh
let needed = nub $ concatMap (requiredCanModules (length abstrs == 1) cgr0) (maybe [] singleton abstr0 ++ concrs0)
purge = nubBy (\x y -> fst x == fst y) . filter (\(m,mo) -> elem m needed && not (isIncompleteCanon (m,mo)))
let cgr = M.MGrammar $ purge $ M.modules cgr0
let oldConcrs = map (snd . fst) (concretes sh)
newConcrs = maybe [] (M.allConcretes gr) abstr0
toRetain (c,v) = notElem c newConcrs
let complete m = case M.lookupModule gr m of
Ok mo -> not $ isIncompleteCanon (m,mo)
_ -> False
let concrs = filter (\i -> complete i && elem i needed) $ nub $ newConcrs ++ oldConcrs
concr0 = ifNull Nothing (return . head) concrs
notInrts f = notElem f $ map fst rts
subcgr = unSubelimCanon cgr
cf's0 <- if (not (oElem (iOpt "docf") opts) && -- cf only built with -docf
(oElem noCF opts || not (hasHOAS cgr))) -- or HOAS, if not -nocf
then return $ map snd $ cfs sh
else mapM (canon2cf opts ign subcgr) newConcrs
let cf's = zip newConcrs cf's0 ++ filter toRetain (cfs sh)
let morphs = [(c,mkMorpho subcgr c) | c <- newConcrs] ++ filter toRetain (morphos sh)
let probss = [] -----
let fromGFC = snd . snd . Cnv.convertGFC opts
(mcfgs, cfgs) = unzip $ map (curry fromGFC cgr) concrs
fcfgs = FCnv.convertGrammar (C2GFCC.mkCanon2gfcc cgr)
pInfos = zipWith3 Prs.buildPInfo mcfgs (map snd fcfgs) cfgs
let funs = funRulesOf cgr
let cats = allCatsOf cgr
let csi = [(c,(co,
[(fun,typ) | (fun,typ) <- funs, compatType tc typ],
funsOnTypeFs compatType funs tc))
| (c,co) <- cats, let tc = cat2val co c]
let deps = True ---- not $ null $ allDepCats cgr
let binds = [] ---- allCatsWithBind cgr
let src = M.updateMGrammar (srcModules sh) sgr
return $ ShSt {
abstract = abstr0,
concrete = concr0,
concretes = zip (zip concrs concrs) (repeat True),
canModules = cgr,
srcModules = src,
cfs = cf's,
abstracts = maybe [] (\a -> [(a,concrs)]) abstr0,
mcfgs = zip concrs mcfgs,
fcfgs = fcfgs,
cfgs = zip concrs cfgs,
pInfos = zip concrs pInfos,
morphos = morphs,
treebanks = treebanks sh,
probss = zip concrs probss,
gloptions = gloptions sh, --- opts, -- this would be command-line options
readFiles = [ft | ft@(f,(_,_)) <- readFiles sh, notInrts f] ++ rts,
absCats = csi,
statistics = [StDepTypes deps,StBoundVars binds],
transfers = transfers sh,
evalEnv = eenv
}
prShellStateInfo :: ShellState -> String
prShellStateInfo sh = unlines [
"main abstract : " +++ abstractName sh,
"main concrete : " +++ maybe "(none)" P.prt (concrete sh),
"actual concretes : " +++ unwords (map (P.prt . fst . fst) (actualConcretes sh)),
"all abstracts : " +++ unwords (map (P.prt . fst) (abstracts sh)),
"all concretes : " +++ unwords (map (P.prt . fst . fst) (concretes sh)),
"canonical modules :" +++ unwords (map (P.prt .fst) (M.modules (canModules sh))),
"source modules : " +++ unwords (map (P.prt .fst) (M.modules (srcModules sh))),
"global options : " +++ prOpts (gloptions sh),
"transfer modules : " +++ unwords (map (P.prt . fst) (transfers sh)),
"treebanks : " +++ unwords (map (P.prt . fst) (treebanks sh))
]
abstractName :: ShellState -> String
abstractName sh = maybe "(none)" P.prt (abstract sh)
-- | throw away those abstracts that are not needed --- could be more aggressive
filterAbstracts :: [Ident] -> CanonGrammar -> CanonGrammar
filterAbstracts absts cgr = M.MGrammar (nubBy (\x y -> fst x == fst y) [m | m <- ms, needed m]) where
ms = M.modules cgr
needed (i,_) = elem i needs
needs = [i | (i,M.ModMod m) <- ms, not (M.isModAbs m) || any (dep i) absts]
dep i a = elem i (ext mse a)
mse = [(i,me) | (i,M.ModMod m) <- ms, M.isModAbs m, me <- [M.extends m]]
ext es a = case lookup a es of
Just e -> a : concatMap (ext es) e ---- FIX multiple exts
_ -> []
purgeShellState :: ShellState -> ShellState
purgeShellState sh = ShSt {
abstract = abstr,
concrete = concrete sh,
concretes = concrs,
canModules = M.MGrammar $ filter complete $ purge $ M.modules $ canModules sh,
srcModules = M.emptyMGrammar,
cfs = cfs sh,
abstracts = maybe [] (\a -> [(a,map (snd . fst) concrs)]) abstr,
mcfgs = mcfgs sh,
fcfgs = fcfgs sh,
cfgs = cfgs sh,
pInfos = pInfos sh,
morphos = morphos sh,
treebanks = treebanks sh,
probss = probss sh,
gloptions = gloptions sh,
readFiles = [],
absCats = absCats sh,
statistics = statistics sh,
transfers = transfers sh,
evalEnv = emptyEEnv
}
where
abstr = abstract sh
concrs = [((a,i),b) | ((a,i),b) <- concretes sh, elem i needed]
isSingle = length (abstracts sh) == 1
needed = nub $ concatMap (requiredCanModules isSingle (canModules sh)) acncs
purge = nubBy (\x y -> fst x == fst y) . filter (flip elem needed . fst)
acncs = maybe [] singleton abstr ++ map (snd . fst) (actualConcretes sh)
complete = not . isIncompleteCanon
changeMain :: Maybe Ident -> ShellState -> Err ShellState
changeMain Nothing (ShSt _ _ cs ms ss cfs old_pis mcfgs fcfgs cfgs pinfos mos tbs pbs os rs acs s trs ee) =
return (ShSt Nothing Nothing [] ms ss cfs old_pis mcfgs fcfgs cfgs pinfos mos tbs pbs os rs acs s trs ee)
changeMain
(Just c) st@(ShSt _ _ cs ms ss cfs old_pis mcfgs fcfgs cfgs pinfos mos tbs pbs os rs acs s trs ee) =
case lookup c (M.modules ms) of
Just _ -> do
a <- M.abstractOfConcrete ms c
let cas = M.allConcretes ms a
let cs' = [((c,c),True) | c <- cas]
return (ShSt (Just a) (Just c) cs' ms ss cfs old_pis mcfgs fcfgs cfgs
pinfos mos tbs pbs os rs acs s trs ee)
_ -> P.prtBad "The state has no concrete syntax named" c
-- | form just one state grammar, if unique, from a canonical grammar
grammar2stateGrammar :: Options -> CanonGrammar -> Err StateGrammar
grammar2stateGrammar opts gr = do
st <- grammar2shellState opts (gr,M.emptyMGrammar)
concr <- maybeErr "no concrete syntax" $ concrete st
return $ stateGrammarOfLang st concr
resourceOfShellState :: ShellState -> Maybe Ident
resourceOfShellState = M.greatestResource . srcModules
qualifTop :: StateGrammar -> G.QIdent -> G.QIdent
qualifTop gr (_,c) = (absId gr,c)
stateGrammarOfLang :: ShellState -> Language -> StateGrammar
stateGrammarOfLang = stateGrammarOfLangOpt True
stateGrammarOfLangOpt :: Bool -> ShellState -> Language -> StateGrammar
stateGrammarOfLangOpt purg st0 l = StGr {
absId = err (const (identC "Abs")) id $ M.abstractOfConcrete allCan l, ---
cncId = l,
grammar = allCan,
cf = maybe emptyCF id (lookup l (cfs st)),
mcfg = maybe [] id $ lookup l $ mcfgs st,
fcfg = maybe [] id $ lookup l $ fcfgs st,
cfg = maybe [] id $ lookup l $ cfgs st,
pInfo = maybe (Prs.buildPInfo [] [] []) id $ lookup l $ pInfos st,
morpho = maybe emptyMorpho id (lookup l (morphos st)),
probs = maybe emptyProbs id (lookup l (probss st)),
loptions = errVal noOptions $ lookupOptionsCan allCan
}
where
st = (if purg then purgeShellState else id) $ errVal st0 $ changeMain (Just l) st0
allCan = canModules st
grammarOfLang :: ShellState -> Language -> CanonGrammar
cfOfLang :: ShellState -> Language -> CF
morphoOfLang :: ShellState -> Language -> Morpho
probsOfLang :: ShellState -> Language -> Probs
optionsOfLang :: ShellState -> Language -> Options
grammarOfLang st = stateGrammarST . stateGrammarOfLang st
cfOfLang st = stateCF . stateGrammarOfLang st
morphoOfLang st = stateMorpho . stateGrammarOfLang st
probsOfLang st = stateProbs . stateGrammarOfLang st
optionsOfLang st = stateOptions . stateGrammarOfLang st
removeLang :: Language -> ShellState -> ShellState
removeLang lang st = purgeShellState $ st{concretes = concs1} where
concs1 = filter ((/=lang) . snd . fst) $ concretes st
-- | the last introduced grammar, stored in options, is the default for operations
firstStateGrammar :: ShellState -> StateGrammar
firstStateGrammar st = errVal (stateAbstractGrammar st) $ do
concr <- maybeErr "no concrete syntax" $ concrete st
return $ stateGrammarOfLang st concr
mkStateGrammar :: ShellState -> Language -> StateGrammar
mkStateGrammar = stateGrammarOfLang
stateAbstractGrammar :: ShellState -> StateGrammar
stateAbstractGrammar st = StGr {
absId = maybe (identC "Abs") id (abstract st), ---
cncId = identC "#Cnc", ---
grammar = canModules st, ---- only abstarct ones
cf = emptyCF,
mcfg = [],
fcfg = [],
cfg = [],
pInfo = Prs.buildPInfo [] [] [],
morpho = emptyMorpho,
probs = emptyProbs,
loptions = gloptions st ----
}
-- analysing shell state into parts
globalOptions :: ShellState -> Options
allLanguages :: ShellState -> [Language]
allTransfers :: ShellState -> [Ident]
allCategories :: ShellState -> [G.Cat]
allStateGrammars :: ShellState -> [StateGrammar]
allStateGrammarsWithNames :: ShellState -> [(Language, StateGrammar)]
allGrammarFileNames :: ShellState -> [String]
allActiveStateGrammarsWithNames :: ShellState -> [(Language, StateGrammar)]
allActiveGrammars :: ShellState -> [StateGrammar]
globalOptions = gloptions
--allLanguages = map (fst . fst) . concretes
allLanguages = map (snd . fst) . actualConcretes
allTransfers = map fst . transfers
allCategories = map fst . allCatsOf . canModules
allStateGrammars = map snd . allStateGrammarsWithNames
allStateGrammarsWithNames st =
[(c, mkStateGrammar st c) | ((c,_),_) <- actualConcretes st]
allGrammarFileNames st = [prLanguage c ++ ".gf" | ((c,_),_) <- actualConcretes st]
allActiveStateGrammarsWithNames st =
[(c, mkStateGrammar st c) | ((c,_),True) <- concretes st] --- actual
allActiveGrammars = map snd . allActiveStateGrammarsWithNames
pathOfModule :: ShellState -> Ident -> FilePath
pathOfModule sh m = maybe "module not found" fst $ lookup (P.prt m) $ readFiles sh
-- command-line option -lang=foo overrides the actual grammar in state
grammarOfOptState :: Options -> ShellState -> StateGrammar
grammarOfOptState opts st =
maybe (firstStateGrammar st) (stateGrammarOfLang st . language) $
getOptVal opts useLanguage
languageOfOptState :: Options -> ShellState -> Maybe Language
languageOfOptState opts st =
maybe (concrete st) (return . language) $ getOptVal opts useLanguage
-- | command-line option -cat=foo overrides the possible start cat of a grammar
firstCatOpts :: Options -> StateGrammar -> CFCat
firstCatOpts opts sgr =
maybe (stateFirstCat sgr) (string2CFCat (P.prt (absId sgr))) $
getOptVal opts firstCat
-- | the first cat for random generation
firstAbsCat :: Options -> StateGrammar -> G.QIdent
firstAbsCat opts = cfCat2Cat . firstCatOpts opts
-- | Gets the start category for the grammar from the options.
-- If the startcat is not set in the options, we look
-- for a flag in the grammar. If there is no flag in the
-- grammar, S is returned.
startCatStateOpts :: Options -> StateGrammar -> CFCat
startCatStateOpts opts sgr =
string2CFCat a (fromMaybe "S" (optsStartCat `mplus` grStartCat))
where optsStartCat = getOptVal opts gStartCat
grStartCat = getOptVal (stateOptions sgr) gStartCat
a = P.prt (absId sgr)
-- | a grammar can have start category as option startcat=foo ; default is S
stateFirstCat :: StateGrammar -> CFCat
stateFirstCat = startCatStateOpts noOptions
stateIsWord :: StateGrammar -> String -> Bool
stateIsWord sg = isKnownWord (stateMorpho sg)
addProbs :: (Ident,Probs) -> ShellState -> Err ShellState
addProbs ip@(lang,probs) sh = do
let gr = grammarOfLang sh lang
probs' <- checkGrammarProbs gr probs
let pbs' = (lang,probs') : filter ((/= lang) . fst) (probss sh)
return $ sh{probss = pbs'}
addTransfer :: (Ident,T.Env) -> ShellState -> ShellState
addTransfer it@(i,_) sh =
sh {transfers = it : filter ((/= i) . fst) (transfers sh)}
addTreebanks :: [(Ident,Treebank)] -> ShellState -> ShellState
addTreebanks its sh = sh {treebanks = its ++ treebanks sh}
findTreebank :: ShellState -> Ident -> Err Treebank
findTreebank sh i = maybeErr "no treebank found" $ lookup i $ treebanks sh
-- modify state
type ShellStateOper = ShellState -> ShellState
type ShellStateOperErr = ShellState -> Err ShellState
reinitShellState :: ShellStateOper
reinitShellState = const emptyShellState
languageOn, languageOff :: Language -> ShellStateOper
languageOn = languageOnOff True
languageOff = languageOnOff False
languageOnOff :: Bool -> Language -> ShellStateOper
--- __________ this is OBSOLETE
languageOnOff b lang sh = sh {concretes = cs'} where
cs' = [if lang==l then (lc,b) else i | i@(lc@(l,c),_) <- concretes sh]
changeOptions :: (Options -> Options) -> ShellStateOper
--- __________ this is OBSOLETE
changeOptions f sh = sh {gloptions = f (gloptions sh)}
addGlobalOptions :: Options -> ShellStateOper
addGlobalOptions = changeOptions . addOptions
removeGlobalOptions :: Options -> ShellStateOper
removeGlobalOptions = changeOptions . removeOptions