----------------------------------------------------------------------
-- |
-- Module      : CFIdent
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/14 16:03:40 $ 
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.13 $
--
-- symbols (categories, functions) for context-free grammars.
-----------------------------------------------------------------------------

module GF.CF.CFIdent (-- * Tokens and categories
		CFTok(..), CFCat(..),
		tS, tC, tL, tI, tF, tV, tM, tInt,
		prCFTok,
		-- * Function names and profiles
		CFFun(..), Profile,
		wordsCFTok,
		-- * CF Functions
		mkCFFun, varCFFun, consCFFun, string2CFFun, stringCFFun, 
                intCFFun, floatCFFun, dummyCFFun,
		cfFun2String, cfFun2Ident, cfFun2Profile, metaCFFun,
		-- * CF Categories
		mkCIdent, ident2CFCat, labels2CFCat, string2CFCat, 
                catVarCF, cat2CFCat, cfCatString, cfCatInt,cfCatFloat,
		moduleOfCFCat, cfCat2Cat, cfCat2Ident, lexCFCat,
		-- * CF Tokens
		string2CFTok, str2cftoks,
		-- * Comparisons
		compatToks, compatTok, compatCFFun, compatCF,
                wordsLits
	       ) where

import GF.Data.Operations
import GF.Canon.GFC
import GF.Infra.Ident
import GF.Grammar.Values (cPredefAbs)
import GF.Canon.AbsGFC
import GF.Grammar.Macros (ident2label)
import GF.Grammar.PrGrammar
import GF.Data.Str
import Data.Char (toLower, toUpper, isSpace)
import Data.List (intersperse)

-- | this type should be abstract
data CFTok = 
   TS String     -- ^ normal strings
 | TC String     -- ^ strings that are ambiguous between upper or lower case
 | TL String     -- ^ string literals
 | TI Integer    -- ^ integer literals
 | TF Double     -- ^ float literals
 | TV Ident      -- ^ variables
 | TM Int String -- ^ metavariables; the integer identifies it
  deriving (Eq, Ord, Show)

-- | this type should be abstract
newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show)

tS :: String -> CFTok
tC :: String -> CFTok
tL :: String -> CFTok
tI :: String -> CFTok
tF :: String -> CFTok
tV :: String -> CFTok
tM :: String -> CFTok

tS  = TS
tC  = TC
tL  = TL
tI  = TI . read 
tF  = TF . read 
tV  = TV . identC
tM  = TM 0

tInt :: Integer -> CFTok
tInt = TI

prCFTok :: CFTok -> String
prCFTok t = case t of
  TS s -> s
  TC s -> s
  TL s -> s
  TI i -> show i
  TF i -> show i
  TV x -> prt x
  TM i m -> m --- "?" --- m

-- | to build trees: the Atom contains a GF function, @Cn | Meta | Vr | Literal@
newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Ord,Show) 
-- - - - - - - - - - - - - - - - - - - - -         ^^^ added by peb, 21/5-04 

type Profile  = [([[Int]],[Int])]

wordsCFTok :: CFTok -> [String]
wordsCFTok t = case t of
  TC (c:cs) -> [c':cs | c' <- [toUpper c, toLower c]]
  _ -> [prCFTok t]

-- the following functions should be used instead of constructors

-- to construct CF functions

mkCFFun :: Atom -> CFFun
mkCFFun t = CFFun (t,[])

varCFFun :: Ident -> CFFun
varCFFun = mkCFFun . AV

consCFFun :: CIdent -> CFFun
consCFFun = mkCFFun . AC

-- | standard way of making cf fun
string2CFFun :: String -> String -> CFFun
string2CFFun m c = consCFFun $ mkCIdent m c

stringCFFun :: String -> CFFun 
stringCFFun = mkCFFun . AS

intCFFun :: Integer -> CFFun 
intCFFun = mkCFFun . AI

floatCFFun :: Double -> CFFun 
floatCFFun = mkCFFun . AF

-- | used in lexer-by-need rules
dummyCFFun :: CFFun
dummyCFFun = varCFFun $ identC "_"

cfFun2String :: CFFun -> String
cfFun2String (CFFun (f,_)) = prt f

cfFun2Ident :: CFFun -> Ident
cfFun2Ident (CFFun (f,_)) = identC $ prt_ f ---

cfFun2Profile :: CFFun -> Profile
cfFun2Profile (CFFun (_,p)) = p

{- ----
strPro2cfFun :: String -> Profile -> CFFun
strPro2cfFun str p = (CFFun (AC (Ident str), p))
-}

metaCFFun :: CFFun
metaCFFun = mkCFFun $ AM 0

-- to construct CF categories

-- | belongs elsewhere
mkCIdent :: String -> String -> CIdent
mkCIdent m c = CIQ (identC m) (identC c)

ident2CFCat :: CIdent -> Ident -> CFCat
ident2CFCat mc d = CFCat (mc, L d)

labels2CFCat :: CIdent -> [Label] -> CFCat
labels2CFCat mc d = CFCat (mc, L (identC (concat (intersperse "." (map prt d))))) ----

-- | standard way of making cf cat: label s
string2CFCat :: String -> String -> CFCat
string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s")

idents2CFCat :: Ident -> Ident -> CFCat
idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s")

catVarCF :: CFCat
catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ----

cat2CFCat :: (Ident,Ident) -> CFCat
cat2CFCat = uncurry idents2CFCat

-- | literals
cfCatString :: CFCat
cfCatString = string2CFCat (prt cPredefAbs) "String"

cfCatInt, cfCatFloat :: CFCat
cfCatInt = string2CFCat (prt cPredefAbs) "Int"
cfCatFloat = string2CFCat (prt cPredefAbs) "Float"


{- ----
uCFCat :: CFCat
uCFCat = cat2CFCat uCat
-}

moduleOfCFCat :: CFCat -> Ident
moduleOfCFCat (CFCat (CIQ m _, _)) = m

-- | the opposite direction
cfCat2Cat :: CFCat -> (Ident,Ident)
cfCat2Cat (CFCat (CIQ m c,_)) = (m,c)

cfCat2Ident :: CFCat -> Ident
cfCat2Ident = snd . cfCat2Cat

lexCFCat :: CFCat -> CFCat
lexCFCat cat = ident2CFCat (uncurry CIQ (cfCat2Cat cat)) (identC "*")

-- to construct CF tokens

string2CFTok :: String -> CFTok
string2CFTok = tS

str2cftoks :: Str -> [CFTok]
str2cftoks = map tS . wordsLits . sstr

-- decide if two token lists look the same (in parser postprocessing)

compatToks :: [CFTok] -> [CFTok] -> Bool
compatToks ts us = and [compatTok t u | (t,u) <- zip ts us]

compatTok :: CFTok -> CFTok -> Bool
compatTok (TM _ _) _ = True --- hack because metas are renamed
compatTok _ (TM _ _) = True
compatTok t u = any (`elem` (alts t)) (alts u) where
  alts u = case u of
    TC (c:s) -> [toLower c : s, toUpper c : s]
    TL s -> [s, prQuotedString s]
    _ -> [prCFTok u]

-- | decide if two CFFuns have the same function head (profiles may differ)
compatCFFun :: CFFun -> CFFun -> Bool
compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g

-- | decide whether two categories match
-- the modifiers can be from different modules, but on the same extension
-- path, so there is no clash, and they can be safely ignored ---
compatCF :: CFCat -> CFCat -> Bool
----compatCF = (==)
compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l'

-- | Like 'words', but does not split on whitespace inside
--   double quotes.wordsLits :: String -> [String]
-- Also treats escaped quotes in quotes (AR 21/12/2005) by breaks
-- instead of break
wordsLits [] = []
wordsLits (c:cs) | isSpace c = wordsLits (dropWhile isSpace cs)
		 | isQuote c
		     = let (l,rs) = breaks (==c) cs
			   rs' = drop 1 rs
			   in ([c]++l++[c]):wordsLits rs'
		 | otherwise = let (w,rs) = break isSpaceQ cs
				   in (c:w):wordsLits rs
  where
    breaks c cs = case break c cs of
      (l@(_:_),d:rs) | last l == '\\' -> 
        let (r,ts) = breaks c rs in (l++[d]++r, ts)
      v -> v
    isQuote c = elem c "\"'" 
    isSpaceQ c = isSpace c ---- || isQuote c