----------------------------------------------------------------------
-- |
-- Module      : Linear
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/14 16:03:41 $ 
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.19 $
--
-- Linearization for canonical GF. AR 7\/6\/2003
-----------------------------------------------------------------------------

module GF.UseGrammar.Linear where

import GF.Canon.GFC
import GF.Canon.AbsGFC
import qualified GF.Grammar.Abstract as A
import GF.Canon.MkGFC (rtQIdent) ----
import GF.Infra.Ident
import GF.Grammar.PrGrammar
import GF.Canon.CMacros
import GF.Canon.Look
import GF.Grammar.LookAbs
import GF.Grammar.MMacros
import GF.Grammar.TypeCheck (annotate) ----
import GF.Data.Str
import GF.Text.Text
----import TypeCheck -- to annotate

import GF.Data.Operations
import GF.Data.Zipper
import qualified GF.Infra.Modules as M

import Control.Monad
import Data.List (intersperse)

-- Linearization for canonical GF. AR 7/6/2003

-- | The worker function: linearize a Tree, return
-- a record. Possibly mark subtrees.
--
-- NB. Constants in trees are annotated by the name of the abstract module.
-- A concrete module name must be given to find (and choose) linearization rules.
--
--  - If no marking is wanted, 'noMark'  :: 'Marker'.
--
--  - For xml marking, use     'markXML' :: 'Marker'
linearizeToRecord :: CanonGrammar -> Marker -> Ident -> A.Tree -> Err Term
linearizeToRecord gr mk m = lin [] where

  lin ts t@(Tr (n,xs)) = errIn ("linearization of" +++ prt t) $ do

    let binds = A.bindsNode n
        at    = A.atomNode n
        fmk   = markSubtree mk n ts (A.isFocusNode n)
    c   <- A.val2cat $ A.valNode n
    xs' <- mapM (\ (i,x) -> lin (i:ts) x) $ zip [0..] xs

    r <- case at of
      A.AtC f -> lookf c t f >>= comp xs'
      A.AtI i -> return $ recInt i
      A.AtL s -> return $ recS $ tK $ prt at
      A.AtF i -> return $ recS $ tK $ prt at
      A.AtV x -> lookCat c >>= comp [tK (prt_ at)]
      A.AtM m -> lookCat c >>= comp [tK (prt_ at)] 

    r' <- case r of     -- to see stg in case the result is variants {}
      FV [] -> lookCat c >>= comp [tK (prt_ t)]
      _ -> return r

    return $ fmk $ mkBinds binds r'

  look = lookupLin gr . redirectIdent m . rtQIdent
  comp = ccompute gr
  mkBinds bs bdy = case bdy of
    R fs  -> R $ [Ass (LV i) (tK (prt t)) | (i,(t,_)) <- zip [0..] bs] ++ fs
    FV rs -> FV $ map (mkBinds bs) rs

  recS t = R [Ass (L (identC "s")) t] ----

  recInt i = R [
              Ass (L (identC "last")) (EInt (rem i 10)),
              Ass (L (identC "s"))  (tK $ show i),
              Ass (L (identC "size")) (EInt (if i > 9 then 1 else 0))
              ]

  lookCat = return . errVal defLindef . look 
         ---- should always be given in the module

  -- to show missing linearization as term
  lookf c t f = case look f of
    Ok h -> return h
    _ -> lookCat c >>= comp [tK (prt_ t)]


-- | thus the special case:
linearizeNoMark :: CanonGrammar -> Ident -> A.Tree -> Err Term
linearizeNoMark gr = linearizeToRecord gr noMark

-- | expand tables in linearized term to full, normal-order tables
--
-- NB expand from inside-out so that values are not looked up in copies of branches

expandLinTables :: CanonGrammar -> Term -> Err Term
expandLinTables gr t = case t of
  R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
  T ty rs -> do
    rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
    let t' = T ty $ map (uncurry Cas) rs'
    vs  <- alls ty
    ps  <- mapM term2patt vs
    ts' <- mapM (comp . S t') $ vs
    return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
  V ty ts0 -> do
    ts  <- mapM exp ts0 -- expand from inside-out
    vs  <- alls ty
    ps  <- mapM term2patt vs
    return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
  FV ts -> liftM FV $ mapM exp ts
  _ -> composOp exp t
 where
   alls = allParamValues gr
   exp  = expandLinTables gr
   comp = ccompute gr []

-- Do this for an entire grammar:

unoptimizeCanon :: CanonGrammar -> CanonGrammar
unoptimizeCanon g@(M.MGrammar ms) = M.MGrammar $ map (unoptimizeCanonMod g) ms 

unoptimizeCanonMod :: CanonGrammar -> CanonModule -> CanonModule
unoptimizeCanonMod g = convMod where
  convMod (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os defs)) = 
    (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os (mapTree convDef defs)))
  convMod mm = mm
  convDef (c,CncCat ty df pr) = (c,CncCat ty (convT df) (convT pr))
  convDef (f,CncFun c xs li pr) = (f,CncFun c xs (convT li) (convT pr))
  convDef cd = cd
  convT = err error id . exp
  -- a version of expandLinTables that does not destroy share optimization
  exp t = case t of
    R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
    T ty rs@[Cas [_] _] -> do
      rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
      let t' = T ty $ map (uncurry Cas) rs'
      vs  <- alls ty
      ps  <- mapM term2patt vs
      ts' <- mapM (comp . S t') $ vs
      return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
    V ty ts0 -> do
      ts  <- mapM exp ts0 -- expand from inside-out
      vs  <- alls ty
      ps  <- mapM term2patt vs
      return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
    FV ts -> liftM FV $ mapM exp ts
    I _   -> comp t
    _ -> composOp exp t
   where
     alls = allParamValues g
     comp = ccompute g []


-- | from records, one can get to records of tables of strings
rec2strTables :: Term -> Err [[(Label,[([Patt],[Str])])]]
rec2strTables r = do
  vs <- allLinValues r
  mapM (mapPairsM (mapPairsM strsFromTerm)) vs

-- | from these tables, one may want to extract the ones for the "s" label
strTables2sTables :: [[(Label,[([Patt],[Str])])]] -> [[([Patt],[Str])]]
strTables2sTables ts = [t | r <- ts, (l,t) <- r, l == linLab0]

linLab0 :: Label
linLab0 = L (identC "s")

-- | to get lists of token lists is easy
sTables2strs :: [[([Patt],[Str])]] -> [[Str]]
sTables2strs = map snd . concat

-- | from this, to get a list of strings
strs2strings :: [[Str]] -> [String]
strs2strings = map unlex

-- | this is just unwords; use an unlexer from Text to postprocess
unlex :: [Str] -> String
unlex = concat . map sstr . take 1 ----

-- | finally, a top-level function to get a string from an expression
linTree2string :: Marker -> CanonGrammar -> Ident -> A.Tree -> String
linTree2string mk gr m e = head $ linTree2strings mk gr m e -- never empty

-- | you can also get many strings
linTree2strings :: Marker -> CanonGrammar -> Ident -> A.Tree -> [String]
linTree2strings mk gr m e = err return id $ do
  t  <- linearizeToRecord gr mk m e
  r  <- expandLinTables gr t
  ts <- rec2strTables r
  let ss = strs2strings $ sTables2strs $ strTables2sTables ts
  ifNull (prtBad "empty linearization of" e) return ss   -- thus never empty

-- | argument is a Tree, value is a list of strs; needed in Parsing
allLinsOfTree :: CanonGrammar -> Ident -> A.Tree -> [Str]
allLinsOfTree gr a e = err (singleton . str) id $ do
  e' <- return e ---- annotateExp gr e
  r  <- linearizeNoMark gr a e'
  r' <- expandLinTables gr r
  ts <- rec2strTables r'
  return $ concat $ sTables2strs $ strTables2sTables ts

-- | the value is a list of structures arranged as records of tables of terms
allLinsAsRec :: CanonGrammar -> Ident -> A.Tree -> Err [[(Label,[([Patt],Term)])]]
allLinsAsRec gr c t = linearizeNoMark gr c t >>= expandLinTables gr >>= allLinValues

-- | the value is a list of structures arranged as records of tables of strings
-- only taking into account string fields
-- True: sep. by /, False: sep by \n
allLinTables :: 
  Bool -> CanonGrammar ->Ident ->A.Tree ->Err [[(Label,[([Patt],[String])])]]
allLinTables slash gr c t = do
  r' <- allLinsAsRec gr c t
  mapM (mapM getS) r'
 where 
   getS (lab,pss) = liftM (curry id lab) $ mapM gets pss
   gets (ps,t) = liftM (curry id ps . cc . map str2strings) $ strsFromTerm t
   cc = concat . intersperse [if slash then "/" else "\n"]

-- | the value is a list of strings gathered from all fields

allLinBranchFields :: CanonGrammar -> Ident -> A.Tree -> Err [String]
allLinBranchFields gr c trm = do
  r <- linearizeNoMark gr c trm >>= expandLinTables gr
  return [s | (_,t) <- allLinBranches r, s <- gets t]
 where 
   gets t = concat [cc (map str2strings s) | Ok s <- [strsFromTerm t]]
   cc = concat . intersperse ["/"]

prLinTable :: Bool -> [[(Label,[([Patt],[String])])]] -> [String]
prLinTable pars = concatMap prOne . concat where
  prOne (lab,pss) = (if pars then ((prt lab) :) else id) (map pr pss) ----
  pr (ps,ss) = (if pars then ((unwords (map prt_ ps) +++ ":") +++)
                             else id) (unwords ss)

{-
-- the value is a list of strs
allLinStrings :: CanonGrammar -> Tree -> [Str]
allLinStrings gr ft = case allLinsAsStrs gr ft of
  Ok ts -> map snd $ concat $ map snd $ concat ts
  Bad s -> [str s]

-- the value is a list of strs, not forgetting their arguments
allLinsAsStrs :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Str)])]]
allLinsAsStrs gr ft = do
  lpts <- allLinearizations gr ft
  return $ concat $ mapM (mapPairsM (mapPairsM strsFromTerm)) lpts


-- to a list of strings
linearizeToStrings :: CanonGrammar -> ([Int] ->Term -> Term) -> Tree -> Err [String]
linearizeToStrings gr mk = liftM (map unlex) . linearizeToStrss gr mk

-- to a list of token lists
linearizeToStrss :: CanonGrammar -> ([Int] -> Term -> Term) -> Tree -> Err [[Str]]
linearizeToStrss gr mk e = do
  R rs <- linearizeToRecord gr mk e ----
  t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
  return $ map strsFromTerm $ allInTable t 
-}

-- | the value is a list of strings, not forgetting their arguments
allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
allLinsOfFun gr f = do
  t <- lookupLin gr f 
  allAllLinValues t --- all fields, not only s. 11/12/2005


-- | returns printname if one exists; otherwise linearizes with metas
printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
printOrLinearize gr c f@(m, d) = errVal (prt fq) $ 
  case lookupPrintname gr (CIQ c d) of
    Ok t -> do
      ss <- strsFromTerm t
      let s = strs2strings [ss]
      return $ ifNull (prt fq) head s
    _ -> do
      ty  <- lookupFunType gr m d
      f'  <- ref2exp [] ty (A.QC m d)
      tr  <- annotate gr f' 
      return $ linTree2string noMark gr c tr
   where
     fq = CIQ m d