forked from GitHub/gf-core
293 lines
10 KiB
Haskell
293 lines
10 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : Linear
|
|
-- Maintainer : AR
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date: 2005/11/14 16:03:41 $
|
|
-- > CVS $Author: aarne $
|
|
-- > CVS $Revision: 1.19 $
|
|
--
|
|
-- Linearization for canonical GF. AR 7\/6\/2003
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.UseGrammar.Linear where
|
|
|
|
import GF.Canon.GFC
|
|
import GF.Canon.AbsGFC
|
|
import qualified GF.Grammar.Abstract as A
|
|
import GF.Canon.MkGFC (rtQIdent) ----
|
|
import GF.Infra.Ident
|
|
import GF.Grammar.PrGrammar
|
|
import GF.Canon.CMacros
|
|
import GF.Canon.Look
|
|
import GF.Grammar.LookAbs
|
|
import GF.Grammar.MMacros
|
|
import GF.Grammar.TypeCheck (annotate) ----
|
|
import GF.Data.Str
|
|
import GF.Text.Text
|
|
----import TypeCheck -- to annotate
|
|
|
|
import GF.Data.Operations
|
|
import GF.Data.Zipper
|
|
import qualified GF.Infra.Modules as M
|
|
|
|
import Control.Monad
|
|
import Data.List (intersperse)
|
|
|
|
-- Linearization for canonical GF. AR 7/6/2003
|
|
|
|
-- | The worker function: linearize a Tree, return
|
|
-- a record. Possibly mark subtrees.
|
|
--
|
|
-- NB. Constants in trees are annotated by the name of the abstract module.
|
|
-- A concrete module name must be given to find (and choose) linearization rules.
|
|
--
|
|
-- - If no marking is wanted, 'noMark' :: 'Marker'.
|
|
--
|
|
-- - For xml marking, use 'markXML' :: 'Marker'
|
|
linearizeToRecord :: CanonGrammar -> Marker -> Ident -> A.Tree -> Err Term
|
|
linearizeToRecord gr mk m = lin [] where
|
|
|
|
lin ts t@(Tr (n,xs)) = errIn ("linearization of" +++ prt t) $ do
|
|
|
|
let binds = A.bindsNode n
|
|
at = A.atomNode n
|
|
fmk = markSubtree mk n ts (A.isFocusNode n)
|
|
c <- A.val2cat $ A.valNode n
|
|
xs' <- mapM (\ (i,x) -> lin (i:ts) x) $ zip [0..] xs
|
|
|
|
r <- case at of
|
|
A.AtC f -> lookf c t f >>= comp xs'
|
|
A.AtI i -> return $ recInt i
|
|
A.AtL s -> return $ recS $ tK $ prt at
|
|
A.AtF i -> return $ recS $ tK $ prt at
|
|
A.AtV x -> lookCat c >>= comp [tK (prt_ at)]
|
|
A.AtM m -> lookCat c >>= comp [tK (prt_ at)]
|
|
|
|
r' <- case r of -- to see stg in case the result is variants {}
|
|
FV [] -> lookCat c >>= comp [tK (prt_ t)]
|
|
_ -> return r
|
|
|
|
return $ fmk $ mkBinds binds r'
|
|
|
|
look = lookupLin gr . redirectIdent m . rtQIdent
|
|
comp = ccompute gr
|
|
mkBinds bs bdy = case bdy of
|
|
R fs -> R $ [Ass (LV i) (tK (prt t)) | (i,(t,_)) <- zip [0..] bs] ++ fs
|
|
FV rs -> FV $ map (mkBinds bs) rs
|
|
|
|
recS t = R [Ass (L (identC "s")) t] ----
|
|
|
|
recInt i = R [
|
|
Ass (L (identC "last")) (EInt (rem i 10)),
|
|
Ass (L (identC "s")) (tK $ show i),
|
|
Ass (L (identC "size")) (EInt (if i > 9 then 1 else 0))
|
|
]
|
|
|
|
lookCat = return . errVal defLindef . look
|
|
---- should always be given in the module
|
|
|
|
-- to show missing linearization as term
|
|
lookf c t f = case look f of
|
|
Ok h -> return h
|
|
_ -> lookCat c >>= comp [tK (prt_ t)]
|
|
|
|
|
|
-- | thus the special case:
|
|
linearizeNoMark :: CanonGrammar -> Ident -> A.Tree -> Err Term
|
|
linearizeNoMark gr = linearizeToRecord gr noMark
|
|
|
|
-- | expand tables in linearized term to full, normal-order tables
|
|
--
|
|
-- NB expand from inside-out so that values are not looked up in copies of branches
|
|
|
|
expandLinTables :: CanonGrammar -> Term -> Err Term
|
|
expandLinTables gr t = case t of
|
|
R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
|
|
T ty rs -> do
|
|
rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
|
|
let t' = T ty $ map (uncurry Cas) rs'
|
|
vs <- alls ty
|
|
ps <- mapM term2patt vs
|
|
ts' <- mapM (comp . S t') $ vs
|
|
return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
|
|
V ty ts0 -> do
|
|
ts <- mapM exp ts0 -- expand from inside-out
|
|
vs <- alls ty
|
|
ps <- mapM term2patt vs
|
|
return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
|
|
FV ts -> liftM FV $ mapM exp ts
|
|
_ -> composOp exp t
|
|
where
|
|
alls = allParamValues gr
|
|
exp = expandLinTables gr
|
|
comp = ccompute gr []
|
|
|
|
-- Do this for an entire grammar:
|
|
|
|
unoptimizeCanon :: CanonGrammar -> CanonGrammar
|
|
unoptimizeCanon g@(M.MGrammar ms) = M.MGrammar $ map (unoptimizeCanonMod g) ms
|
|
|
|
unoptimizeCanonMod :: CanonGrammar -> CanonModule -> CanonModule
|
|
unoptimizeCanonMod g = convMod where
|
|
convMod (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os defs)) =
|
|
(m, M.ModMod (M.Module (M.MTConcrete a) x flags me os (mapTree convDef defs)))
|
|
convMod mm = mm
|
|
convDef (c,CncCat ty df pr) = (c,CncCat ty (convT df) (convT pr))
|
|
convDef (f,CncFun c xs li pr) = (f,CncFun c xs (convT li) (convT pr))
|
|
convDef cd = cd
|
|
convT = err error id . exp
|
|
-- a version of expandLinTables that does not destroy share optimization
|
|
exp t = case t of
|
|
R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
|
|
T ty rs@[Cas [_] _] -> do
|
|
rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
|
|
let t' = T ty $ map (uncurry Cas) rs'
|
|
vs <- alls ty
|
|
ps <- mapM term2patt vs
|
|
ts' <- mapM (comp . S t') $ vs
|
|
return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
|
|
V ty ts0 -> do
|
|
ts <- mapM exp ts0 -- expand from inside-out
|
|
vs <- alls ty
|
|
ps <- mapM term2patt vs
|
|
return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
|
|
FV ts -> liftM FV $ mapM exp ts
|
|
I _ -> comp t
|
|
_ -> composOp exp t
|
|
where
|
|
alls = allParamValues g
|
|
comp = ccompute g []
|
|
|
|
|
|
-- | from records, one can get to records of tables of strings
|
|
rec2strTables :: Term -> Err [[(Label,[([Patt],[Str])])]]
|
|
rec2strTables r = do
|
|
vs <- allLinValues r
|
|
mapM (mapPairsM (mapPairsM strsFromTerm)) vs
|
|
|
|
-- | from these tables, one may want to extract the ones for the "s" label
|
|
strTables2sTables :: [[(Label,[([Patt],[Str])])]] -> [[([Patt],[Str])]]
|
|
strTables2sTables ts = [t | r <- ts, (l,t) <- r, l == linLab0]
|
|
|
|
linLab0 :: Label
|
|
linLab0 = L (identC "s")
|
|
|
|
-- | to get lists of token lists is easy
|
|
sTables2strs :: [[([Patt],[Str])]] -> [[Str]]
|
|
sTables2strs = map snd . concat
|
|
|
|
-- | from this, to get a list of strings
|
|
strs2strings :: [[Str]] -> [String]
|
|
strs2strings = map unlex
|
|
|
|
-- | this is just unwords; use an unlexer from Text to postprocess
|
|
unlex :: [Str] -> String
|
|
unlex = concat . map sstr . take 1 ----
|
|
|
|
-- | finally, a top-level function to get a string from an expression
|
|
linTree2string :: Marker -> CanonGrammar -> Ident -> A.Tree -> String
|
|
linTree2string mk gr m e = head $ linTree2strings mk gr m e -- never empty
|
|
|
|
-- | you can also get many strings
|
|
linTree2strings :: Marker -> CanonGrammar -> Ident -> A.Tree -> [String]
|
|
linTree2strings mk gr m e = err return id $ do
|
|
t <- linearizeToRecord gr mk m e
|
|
r <- expandLinTables gr t
|
|
ts <- rec2strTables r
|
|
let ss = strs2strings $ sTables2strs $ strTables2sTables ts
|
|
ifNull (prtBad "empty linearization of" e) return ss -- thus never empty
|
|
|
|
-- | argument is a Tree, value is a list of strs; needed in Parsing
|
|
allLinsOfTree :: CanonGrammar -> Ident -> A.Tree -> [Str]
|
|
allLinsOfTree gr a e = err (singleton . str) id $ do
|
|
e' <- return e ---- annotateExp gr e
|
|
r <- linearizeNoMark gr a e'
|
|
r' <- expandLinTables gr r
|
|
ts <- rec2strTables r'
|
|
return $ concat $ sTables2strs $ strTables2sTables ts
|
|
|
|
-- | the value is a list of structures arranged as records of tables of terms
|
|
allLinsAsRec :: CanonGrammar -> Ident -> A.Tree -> Err [[(Label,[([Patt],Term)])]]
|
|
allLinsAsRec gr c t = linearizeNoMark gr c t >>= expandLinTables gr >>= allLinValues
|
|
|
|
-- | the value is a list of structures arranged as records of tables of strings
|
|
-- only taking into account string fields
|
|
-- True: sep. by /, False: sep by \n
|
|
allLinTables ::
|
|
Bool -> CanonGrammar ->Ident ->A.Tree ->Err [[(Label,[([Patt],[String])])]]
|
|
allLinTables slash gr c t = do
|
|
r' <- allLinsAsRec gr c t
|
|
mapM (mapM getS) r'
|
|
where
|
|
getS (lab,pss) = liftM (curry id lab) $ mapM gets pss
|
|
gets (ps,t) = liftM (curry id ps . cc . map str2strings) $ strsFromTerm t
|
|
cc = concat . intersperse [if slash then "/" else "\n"]
|
|
|
|
-- | the value is a list of strings gathered from all fields
|
|
|
|
allLinBranchFields :: CanonGrammar -> Ident -> A.Tree -> Err [String]
|
|
allLinBranchFields gr c trm = do
|
|
r <- linearizeNoMark gr c trm >>= expandLinTables gr
|
|
return [s | (_,t) <- allLinBranches r, s <- gets t]
|
|
where
|
|
gets t = concat [cc (map str2strings s) | Ok s <- [strsFromTerm t]]
|
|
cc = concat . intersperse ["/"]
|
|
|
|
prLinTable :: Bool -> [[(Label,[([Patt],[String])])]] -> [String]
|
|
prLinTable pars = concatMap prOne . concat where
|
|
prOne (lab,pss) = (if pars then ((prt lab) :) else id) (map pr pss) ----
|
|
pr (ps,ss) = (if pars then ((unwords (map prt_ ps) +++ ":") +++)
|
|
else id) (unwords ss)
|
|
|
|
{-
|
|
-- the value is a list of strs
|
|
allLinStrings :: CanonGrammar -> Tree -> [Str]
|
|
allLinStrings gr ft = case allLinsAsStrs gr ft of
|
|
Ok ts -> map snd $ concat $ map snd $ concat ts
|
|
Bad s -> [str s]
|
|
|
|
-- the value is a list of strs, not forgetting their arguments
|
|
allLinsAsStrs :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Str)])]]
|
|
allLinsAsStrs gr ft = do
|
|
lpts <- allLinearizations gr ft
|
|
return $ concat $ mapM (mapPairsM (mapPairsM strsFromTerm)) lpts
|
|
|
|
|
|
-- to a list of strings
|
|
linearizeToStrings :: CanonGrammar -> ([Int] ->Term -> Term) -> Tree -> Err [String]
|
|
linearizeToStrings gr mk = liftM (map unlex) . linearizeToStrss gr mk
|
|
|
|
-- to a list of token lists
|
|
linearizeToStrss :: CanonGrammar -> ([Int] -> Term -> Term) -> Tree -> Err [[Str]]
|
|
linearizeToStrss gr mk e = do
|
|
R rs <- linearizeToRecord gr mk e ----
|
|
t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
|
|
return $ map strsFromTerm $ allInTable t
|
|
-}
|
|
|
|
-- | the value is a list of strings, not forgetting their arguments
|
|
allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
|
|
allLinsOfFun gr f = do
|
|
t <- lookupLin gr f
|
|
allAllLinValues t --- all fields, not only s. 11/12/2005
|
|
|
|
|
|
-- | returns printname if one exists; otherwise linearizes with metas
|
|
printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
|
|
printOrLinearize gr c f@(m, d) = errVal (prt fq) $
|
|
case lookupPrintname gr (CIQ c d) of
|
|
Ok t -> do
|
|
ss <- strsFromTerm t
|
|
let s = strs2strings [ss]
|
|
return $ ifNull (prt fq) head s
|
|
_ -> do
|
|
ty <- lookupFunType gr m d
|
|
f' <- ref2exp [] ty (A.QC m d)
|
|
tr <- annotate gr f'
|
|
return $ linTree2string noMark gr c tr
|
|
where
|
|
fq = CIQ m d
|