gf-core/src/GF/Canon/CanonToGFCC.hs

----------------------------------------------------------------------
-- |
-- Module      : CanonToGFCC
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 14:15:17 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.15 $
--
-- a decompiler. AR 12/6/2003 -- 19/4/2004
-----------------------------------------------------------------------------

module GF.Canon.CanonToGFCC (prCanon2gfcc) where

import GF.Canon.AbsGFC
import qualified GF.Canon.GFC as GFC
import qualified GF.Canon.Look as Look
import qualified GF.Canon.Subexpressions as Sub
import qualified GF.Canon.GFCC.AbsGFCC as C
import qualified GF.Canon.GFCC.PrintGFCC as Pr
import GF.Canon.GFC
import GF.Canon.Share
import qualified GF.Grammar.Abstract as A
import qualified GF.Grammar.Macros as GM
import GF.Canon.MkGFC
import GF.Canon.CMacros
import qualified GF.Infra.Modules as M
import qualified GF.Infra.Option as O
import GF.UseGrammar.Linear (expandLinTables, unoptimizeCanon)

import GF.Infra.Ident
import GF.Data.Operations
import GF.Text.UTF8

import Data.List
import qualified Data.Map as Map
import Debug.Trace ----

-- the main function: generate GFCC from GFCM.

prCanon2gfcc :: CanonGrammar -> String
prCanon2gfcc =
  Pr.printTree . canon2gfcc . reorder . utf8Conv . canon2canon . normalize

-- This is needed to reorganize the grammar. GFCC has its own back-end optimization.
-- But we need to have the canonical order in tables, created by valOpt
normalize :: CanonGrammar -> CanonGrammar
normalize = share . unoptimizeCanon . Sub.unSubelimCanon where
  share = M.MGrammar . map (shareModule valOpt) . M.modules --- allOpt

-- Generate GFCC from GFCM.
-- this assumes a grammar translated by canon2canon

canon2gfcc :: CanonGrammar -> C.Grammar
canon2gfcc cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
     C.Grm (C.Hdr (i2i a) cs) (C.Abs adefs) cncs where
  cs  = map (i2i . fst) cms
  adefs = [C.Fun f' (mkType ty) (C.Tr (C.AC f') []) |
            (f,GFC.AbsFun ty _) <- tree2list (M.jments abm), let f' = i2i f]
  cncs  = [C.Cnc (i2i lang) (concr m) | (lang,M.ModMod m) <- cms]
  concr mo = optConcrete
               [C.Lin (i2i f) (mkTerm tr) |
                 (f,GFC.CncFun _ _ tr _) <- tree2list (M.jments mo)]

i2i :: Ident -> C.CId
i2i (IC c) = C.CId c

mkType :: A.Type -> C.Type
mkType t = case GM.catSkeleton t of
  Ok (cs,c) -> C.Typ (map (i2i . snd) cs) (i2i $ snd c)

mkTerm :: Term -> C.Term
mkTerm tr = case tr of
  Arg (A _ i) -> C.V i
  EInt i      -> C.C i
  R rs     -> C.R [mkTerm t | Ass _ t <- rs]
  P t l    -> C.P (mkTerm t) (C.C (mkLab l))
  T _ [Cas [PV (IC x)] t] -> C.A (C.CId x) (mkTerm t) -- abstraction
  T _ cs   -> C.R [mkTerm t | Cas _ t <- cs] --- should not appear after values opt
  V _ cs   -> C.R [mkTerm t | t <- cs]
  S t p    -> C.P (mkTerm t) (mkTerm p)
  C s t    -> C.S [mkTerm x | x <- [s,t]]
  LI(IC x) -> C.L (C.CId x)
  FV ts    -> C.FV [mkTerm t | t <- ts]
  K (KS s) -> C.K (C.KS s)
  K (KP ss _) -> C.K (C.KP ss []) ---- TODO: prefix variants
  E -> C.S []
  Par _ _  -> prtTrace tr $ C.C 66661            ---- just for debugging
  _ -> C.S [C.K (C.KS (A.prt tr +++ "66662"))] ---- just for debugging
 where
   mkLab (L (IC l)) = case l of
     '_':ds -> (read ds) :: Integer
     _ -> prtTrace tr $ 66663

-- return just one module per language

reorder :: CanonGrammar -> CanonGrammar
reorder cg = M.MGrammar $
    (abs, M.ModMod $
          M.Module M.MTAbstract M.MSComplete [] [] [] adefs):
      [(c, M.ModMod $
          M.Module (M.MTConcrete abs) M.MSComplete [] [] [] (sorted2tree js))
            | (c,js) <- cncs]
     where
       abs = maybe (error "no abstract") id $ M.greatestAbstract cg
       mos = M.allModMod cg
       adefs =
         sorted2tree $ sortBy (\ (f,_) (g,_) -> compare f g)
            [finfo |
              (i,mo) <- M.allModMod cg, M.isModAbs mo,
              finfo <- tree2list (M.jments mo)]
       cncs = sortBy (\ (x,_) (y,_) -> compare x y)
            [(lang, concr lang) | lang <- M.allConcretes cg abs]
       concr la = sortBy (\ (f,_) (g,_) -> compare f g)
            [finfo |
              (i,mo) <- mos, M.isModCnc mo, elem i (M.allExtends cg la),
              finfo <- tree2list (M.jments mo)]

-- convert to UTF8 if not yet converted
utf8Conv :: CanonGrammar -> CanonGrammar
utf8Conv = M.MGrammar . map toUTF8 . M.modules where
  toUTF8 mo = case mo of
    (i, M.ModMod m)
      | hasFlagCanon (flagCanon "coding" "utf8") mo -> mo
      | otherwise -> (i, M.ModMod $
          m{ M.jments =
              mapTree (onSnd (mapInfoTerms (onTokens encodeUTF8))) (M.jments m),
	     M.flags = setFlag "coding" "utf8" (M.flags m) }
          )
    _ -> mo


-- translate tables and records to arrays, parameters and labels to indices

canon2canon :: CanonGrammar -> CanonGrammar
canon2canon cg = tr $ M.MGrammar $ map c2c $ M.modules cg where
  c2c (c,m) = case m of
    M.ModMod mo@(M.Module _ _ _ _ _ js) ->
      (c, M.ModMod $ M.replaceJudgements mo $ mapTree j2j js)
    _ -> (c,m)
  j2j (f,j) = case j of
    GFC.CncFun x y tr z -> (f,GFC.CncFun x y (t2t tr) z)
    _ -> (f,j)
  t2t = term2term cg pv
  pv@(labels,untyps,typs) = paramValues cg
  tr = trace $
   (unlines [A.prt c ++ "." ++ unwords (map A.prt l) +++ "=" +++ show i  |
       ((c,l),i) <- Map.toList labels]) ++
   (unlines [A.prt t +++ "=" +++ show i  |
       (t,i) <- Map.toList untyps]) ++
   (unlines [A.prt t |
       (t,_) <- Map.toList typs])

type ParamEnv =
  (Map.Map (Ident,[Label]) (CType,Integer), -- numbered labels
   Map.Map Term Integer,                    -- untyped terms to values
   Map.Map CType (Map.Map Term Integer))    -- types to their terms to values

--- gathers those param types that are actually used in lincats and in lin terms
paramValues :: CanonGrammar -> ParamEnv
paramValues cgr = (labels,untyps,typs) where
  params = [(ty, errVal [] $ Look.allParamValues cgr ty) | ty <- partyps]
  partyps = nub $ [ty |
              (_,(_,CncCat (RecType ls) _ _)) <- jments,
              ty0 <- [ty | Lbg _ ty <- ls],
              ty  <- typsFrom ty0
            ] ++ [
             Cn (CIQ m ty) |
              (m,(ty,ResPar _)) <- jments
            ] ++ [ty |
              (_,(_,CncFun _ _ tr _)) <- jments,
              ty  <- err (const []) snd $ appSTM (typsFromTrm tr) []
            ]
  typsFrom ty = case ty of
    Table p t  -> typsFrom p ++ typsFrom t
    RecType ls -> ty : concat [typsFrom t | Lbg _ t <- ls]
    _ -> [ty]

  typsFromTrm :: Term -> STM [CType] Term
  typsFromTrm tr = case tr of
    V ty ts -> updateSTM (ty:) >> mapM_ typsFromTrm ts >> return tr
    T ty cs -> updateSTM (ty:) >> mapM_ typsFromTrm [t | Cas _ t <- cs] >> return tr
    _ -> composOp typsFromTrm tr


  jments = [(m,j) | (m,mo) <- M.allModMod cgr, j <- tree2list $ M.jments mo]
  typs = Map.fromList [(ci,Map.fromList (zip vs [0..])) | (ci,vs) <- params]
  untyps = Map.fromList $ concatMap Map.toList [typ | (_,typ) <- Map.toList typs]
  lincats =
    [(IC cat,[Lbg (L (IC "s")) TStr]) | cat <- ["Int", "Float", "String"]] ++
    [(cat,ls) | (_,(cat,CncCat (RecType ls) _ _)) <- jments]
  labels = Map.fromList $ concat
    [((cat,[lab]),(typ,i)):
      [((cat,[lab,lab2]),(ty,j)) |
        rs <- getRec typ, (Lbg lab2 ty,j) <- zip rs [0..]]
      |
        (cat,ls) <- lincats, (Lbg lab typ,i) <- zip ls [0..]]
    -- go to tables recursively
    ---- TODO: even go to deeper records
   where
     getRec typ = case typ of
       RecType rs -> [rs]
       Table _ t  -> getRec t
       _ -> []

term2term :: CanonGrammar -> ParamEnv -> Term -> Term
term2term cgr env@(labels,untyps,typs) tr = case tr of
  Par _ _ -> mkValCase tr
  R rs ->
    let
      rs' = [Ass (mkLab i) (t2t t) | (i,Ass l t) <- zip [0..] rs, not (isLock l t)]
    in if (any (isStr . trmAss) rs)
      then R rs'
      else R [Ass (mkLab 0) (mkValCase tr), Ass (mkLab 1) (R rs')]
  P t l    -> r2r tr
  T i [Cas p t] -> T i [Cas p (t2t t)]
  T _ _    -> case expandLinTables cgr tr of  -- to normalize the order of cases
    Ok (T ty cs) -> V ty [t2t t | Cas _ t <- cs]
    _ -> K (KS (A.prt tr +++ prtTrace tr "66668"))
  V ty ts  -> V ty [t2t t | t <- ts]
  S t p    -> S (t2t t) (t2t p)
  _ -> composSafeOp t2t tr
 where
   t2t = term2term cgr env

   r2r tr@(P p _) = case getLab tr of
     Ok (cat,labs) -> P (t2t p) . mkLab $ maybe (prtTrace tr $ 66664) snd $
          Map.lookup (cat,labs) labels
     _ -> K (KS (A.prt tr +++ prtTrace tr "66665"))

   -- this goes recursively into tables (ignored) and records (accumulated)
   getLab tr = case tr of
     Arg (A cat _) -> return (cat,[])
     P p lab2 -> do
       (cat,labs) <- getLab p
       return (cat,labs++[lab2])
     S p _ -> getLab p
     _ -> Bad "getLab"

   doVar :: Term -> STM [((CType,[Term]),(Term,Term))] Term
   doVar tr = case getLab tr of
     Ok (cat, lab) -> do
       k <- readSTM >>= return . length
       let tr' = LI $ identC $ show k
       let tyvs = case Map.lookup (cat,lab) labels of
             Just (ty,_) -> case Map.lookup ty typs of
               Just vs -> (ty,[t |
                            (t,_) <- sortBy (\x y -> compare (snd x) (snd y)) (Map.assocs vs)])
               _ -> error $ A.prt ty
             _ -> error $ A.prt tr
       updateSTM ((tyvs, (tr', tr)):)
       return tr'
     _ -> composOp doVar tr

   mkValCase tr = case appSTM (doVar tr) [] of
     Ok (tr', st@(_:_)) -> t2t $ comp $ foldr mkCase tr' st
     _ -> valNum tr

   mkCase ((ty,vs),(x,p)) tr =
     S (V ty [mkBranch x v tr | v <- vs]) p
   mkBranch x t tr = case tr of
     _ | tr == x -> t
     _ -> composSafeOp (mkBranch x t) tr

   mkLab k = L (IC ("_" ++ show k))
   valNum tr = maybe (tryPerm tr) EInt $ Map.lookup tr untyps
    --- a hack needed because GFCC does not guarantee canonical order of param records
    where
      tryPerm tr = case tr of
        R rs -> case [v | Just v <- [Map.lookup (R rs') untyps | rs' <- permutations rs]] of
	  v:_ -> EInt v
          _ -> valNumFV $ tryVar tr
        _ -> valNumFV $ tryVar tr
      tryVar tr = case tr of
        Par c ts -> [Par c ts' | ts' <- combinations (map tryVar ts)]
        FV ts -> ts
        _ -> [tr]
      valNumFV ts = case ts of
        [tr] -> K (KS (A.prt tr +++ prtTrace tr "66667"))
        _ -> FV $ map valNum ts
      permutations xx = case xx of
        [] -> [[]]
        _  -> [x:xs | x <- xx, xs <- permutations (xx \\ [x])]
   isStr tr = case tr of
     Par _ _ -> False
     EInt _  -> False
     R rs    -> any (isStr . trmAss) rs
     FV ts   -> any isStr ts
     P t r   -> True      ---- TODO
     _ -> True
   isLock l t = case t of --- need not look at l
     R [] -> True
     _ -> False
   trmAss (Ass _ t) = t

   --- this is mainly needed for parameter record projections
   comp t = errVal t $ Look.ccompute cgr [] t


prtTrace tr n = n ----trace ("-- ERROR" +++ A.prt tr +++ show n +++ show tr) n
prTrace  tr n = trace ("-- OBSERVE" +++ A.prt tr +++ show n +++ show tr) n

-- back-end optimization:
-- suffix analysis followed by common subexpression elimination

optConcrete :: [C.CncDef] -> [C.CncDef]
optConcrete defs = subex [C.Lin f (optTerm t) | C.Lin f t <- defs]

-- analyse word form lists into prefix + suffixes
-- suffix sets can later be shared by subex elim

optTerm :: C.Term -> C.Term
optTerm tr = case tr of
    C.R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | C.K (C.KS s) <- ts]
    C.R ts  -> C.R $ map optTerm ts
    C.P t v -> C.P (optTerm t) v
    C.A x t -> C.A x (optTerm t)
    _ -> tr
 where
  optToks ss = prf : suffs where
    prf = pref (head ss) (tail ss)
    suffs = map (drop (length prf)) ss
    pref cand ss = case ss of
      s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss
      _ -> cand
  isK t = case t of
    C.K (C.KS _) -> True
    _ -> False
  mkSuff ("":ws) = C.R (map (C.K . C.KS) ws)
  mkSuff (p:ws) = C.W p (C.R (map (C.K . C.KS) ws))


-- common subexpression elimination; see ./Subexpression.hs for the idea

subex :: [C.CncDef] -> [C.CncDef]
subex js = errVal js $ do
  (tree,_) <- appSTM (getSubtermsMod js) (Map.empty,0)
  return $ addSubexpConsts tree js

type TermList = Map.Map C.Term (Int,Int) -- number of occs, id
type TermM a = STM (TermList,Int) a

addSubexpConsts :: TermList -> [C.CncDef] -> [C.CncDef]
addSubexpConsts tree lins =
  let opers = sortBy (\ (C.Lin f _) (C.Lin g _) -> compare f g)
                [C.Lin (fid id) trm | (trm,(_,id)) <- list]
  in map mkOne $ opers ++ lins
 where
   mkOne (C.Lin f trm) = (C.Lin f (recomp f trm))
   recomp f t = case Map.lookup t tree of
     Just (_,id) | fid id /= f -> C.F $ fid id -- not to replace oper itself
     _ -> case t of
       C.R ts  -> C.R $ map (recomp f) ts
       C.S ts  -> C.S $ map (recomp f) ts
       C.W s t -> C.W s (recomp f t)
       C.P t p -> C.P (recomp f t) (recomp f p)
       C.A x t -> C.A x (recomp f t)
       _ -> t
   fid n = C.CId $ "_" ++ show n
   list = Map.toList tree

getSubtermsMod :: [C.CncDef] -> TermM TermList
getSubtermsMod js = do
  mapM (getInfo collectSubterms) js
  (tree0,_) <- readSTM
  return $ Map.filter (\ (nu,_) -> nu > 1) tree0
 where
   getInfo get (C.Lin f trm) = do
     get trm
     return ()

collectSubterms :: C.Term -> TermM ()
collectSubterms t = case t of
  C.R ts -> do
    mapM collectSubterms ts
    add t
  C.S ts -> do
    mapM collectSubterms ts
    add t
  C.A x b -> do
    collectSubterms b -- t itself can only occur once in a grammar
  C.W s u -> do
    collectSubterms u
    add t
  _ -> return ()
 where
   add t = do
     (ts,i) <- readSTM
     let
       ((count,id),next) = case Map.lookup t ts of
         Just (nu,id) -> ((nu+1,id), i)
         _ ->            ((1,   i ), i+1)
     writeSTM (Map.insert t (count,id) ts, next)