gf-core/src-3.0/GF/Formalism/Utilities.hs

----------------------------------------------------------------------
-- |
-- Maintainer  : PL
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/13 12:40:19 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.6 $
--
-- Basic type declarations and functions for grammar formalisms
-----------------------------------------------------------------------------


module GF.Formalism.Utilities where

import Control.Monad
import Data.Array
import Data.List (groupBy)

import GF.Data.SortedList
import GF.Data.Assoc
import GF.Data.Utilities (sameLength, foldMerge, splitBy)

import GF.Infra.PrintClass

------------------------------------------------------------
-- * symbols

data Symbol c t = Cat c | Tok t
		  deriving (Eq, Ord, Show)

symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
symbol fc ft (Cat cat) = fc cat
symbol fc ft (Tok tok) = ft tok

mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u
mapSymbol fc ft = symbol (Cat . fc) (Tok . ft)

filterCats :: [Symbol c t] -> [c]
filterCats syms = [ cat | Cat cat <- syms ]

filterToks :: [Symbol c t] -> [t]
filterToks syms = [ tok | Tok tok <- syms ]

------------------------------------------------------------
-- * edges

data Edge s = Edge Int Int s
	      deriving (Eq, Ord, Show)

instance Functor Edge where
    fmap f (Edge i j s) = Edge i j (f s)


------------------------------------------------------------
-- * representaions of input tokens

data Input t = MkInput { inputEdges  :: [Edge t],
			 inputBounds :: (Int, Int),
			 inputFrom   :: Array Int (Assoc t [Int]),
			 inputTo     :: Array Int (Assoc t [Int]),
			 inputToken  :: Assoc t [(Int, Int)]
		       }

makeInput :: Ord t => [Edge t] -> Input t
input     :: Ord t =>  [t]     -> Input t
inputMany :: Ord t => [[t]]    -> Input t

instance Show t => Show (Input t) where
    show input = "makeInput " ++ show (inputEdges input)

----------

makeInput inEdges  | null inEdges = input []
		   | otherwise    = MkInput inEdges inBounds inFrom inTo inToken
    where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ]
	      where minmax (a, b) (a', b') = (min a a', max b b')
	  inFrom   = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $
		     [ (i, [(tok, j)]) | Edge i j tok <- inEdges ]
	  inTo     = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds
		     [ (j, [(tok, i)]) | Edge i j tok <- inEdges ]
	  inToken  = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]

input toks         = MkInput inEdges inBounds inFrom inTo inToken
    where inEdges  = zipWith3 Edge [0..] [1..] toks
	  inBounds = (0, length toks)
	  inFrom   = listArray inBounds $
		     [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ]
	  inTo     = listArray inBounds $
		     [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ]
	  inToken  = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]

inputMany toks     = MkInput inEdges inBounds inFrom inTo inToken
    where inEdges  = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ]
	  inBounds = (0, length toks)
	  inFrom   = listArray inBounds $
		     [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ]
		     ++ [ listAssoc [] ]
	  inTo     = listArray inBounds $
		     [ listAssoc [] ] ++
		     [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ]
	  inToken  = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]


------------------------------------------------------------
-- * representations of syntactical analyses

-- ** charts as finite maps over edges

-- | The values of the chart, a list of key-daughters pairs,
-- has unique keys. In essence, it is a map from 'n' to daughters.
-- The daughters should be a set (not necessarily sorted) of rhs's.
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]

data SyntaxNode n e = SMeta
                    | SNode n [e]
                    | SString String
                    | SInt    Integer
                    | SFloat  Double
                    deriving (Eq,Ord)

groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
groupSyntaxNodes []                =  []
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
  where
    (ess,xs') = span xs

    span []       = ([],[])
    span xs@(SNode n es:xs')
      | n0 == n   = let (ess,xs) = span xs' in (es:ess,xs)
      | otherwise = ([],xs)
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
groupSyntaxNodes (SInt    n:xs) = (SInt    n) : groupSyntaxNodes xs
groupSyntaxNodes (SFloat  f:xs) = (SFloat  f) : groupSyntaxNodes xs

-- better(?) representation of forests:
-- data Forest n = F (SMap n (SList [Forest n])) Bool
-- ==
-- type Forest n = GeneralTrie n (SList [Forest n]) Bool
-- (the Bool == isMeta)

-- ** syntax forests

data SyntaxForest n = FMeta
		    | FNode n [[SyntaxForest n]]
                      -- ^ The outer list should be a set (not necessarily sorted)
		      -- of possible alternatives. Ie. the outer list
		      -- is a disjunctive node, and the inner lists
		      -- are (conjunctive) concatenative nodes
		    | FString String
		    | FInt    Integer
		    | FFloat  Double
		      deriving (Eq, Ord, Show)

instance Functor SyntaxForest where
    fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
    fmap _ (FString s) = FString s
    fmap _ (FInt    n) = FInt    n
    fmap _ (FFloat  f) = FFloat  f
    fmap _ (FMeta)     = FMeta

forestName :: SyntaxForest n -> Maybe n
forestName (FNode n _) = Just n
forestName _           = Nothing

unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
unifyManyForests = foldM unifyForests FMeta

-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
-- and all children can be unified
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
unifyForests FMeta  forest = return forest
unifyForests forest FMeta  = return forest
unifyForests (FNode name1 children1) (FNode name2 children2)
    | name1 == name2 && not (null children) = return $ FNode name1 children
    where children = [ forests | forests1 <- children1, forests2 <- children2,
		       sameLength forests1 forests2,
		       forests <- zipWithM unifyForests forests1 forests2 ]
unifyForests (FString s1) (FString s2)
    | s1 == s2  = return $ FString s1
unifyForests (FInt n1) (FInt n2)
    | n1 == n2  = return $ FInt n1
unifyForests (FFloat f1) (FFloat f2)
    | f1 == f2  = return $ FFloat f1
unifyForests _ _ = fail "forest unification failure"

{- måste tänka mer på detta:
compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n)
compactForests = map joinForests . groupBy eqNames . sortForests
    where eqNames f g    = forestName f == forestName g
	  sortForests    = foldMerge mergeForests [] . map return
	  mergeForests [] gs = gs
	  mergeForests fs [] = fs
	  mergeForests fs@(f:fs') gs@(g:gs')
	      = case forestName f `compare` forestName g of
		  LT ->     f : mergeForests fs' gs
		  GT ->     g : mergeForests fs  gs'
		  EQ -> f : g : mergeForests fs' gs'
	  joinForests fs = case forestName (head fs) of
			     Nothing   -> FMeta
			     Just name -> FNode name $
					  compactDaughters $
					  concat [ fss | FNode _ fss <- fs ]
	  compactDaughters fss = case head fss of
				   []  -> [[]]
				   [_] -> map return $ compactForests $ concat fss
				   _   -> nubsort fss
-}

-- ** syntax trees

data SyntaxTree n = TMeta
                  | TNode n [SyntaxTree n]
                  | TString  String
                  | TInt     Integer
                  | TFloat   Double
		  deriving (Eq, Ord, Show)

instance Functor SyntaxTree where
    fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
    fmap _ (TString s)     = TString s
    fmap _ (TInt    n)     = TInt    n
    fmap _ (TFloat  f)     = TFloat  f
    fmap _ (TMeta)         = TMeta

treeName :: SyntaxTree n -> Maybe n
treeName (TNode n _) = Just n
treeName (TMeta)     = Nothing

unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n)
unifyManyTrees = foldM unifyTrees TMeta

-- | two trees can be unified, if either is 'TMeta',
-- or both have the same parent, and their children can be unified
unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n)
unifyTrees TMeta tree = return tree
unifyTrees tree TMeta = return tree
unifyTrees (TNode name1 children1) (TNode name2 children2)
    | name1 == name2 && sameLength children1 children2
	= liftM (TNode name1) $ zipWithM unifyTrees children1 children2
unifyTrees (TString s1) (TString s2)
    | s1 == s2 = return (TString s1)
unifyTrees (TInt n1) (TInt n2)
    | n1 == n2 = return (TInt n1)
unifyTrees (TFloat f1) (TFloat f2)
    | f1 == f2 = return (TFloat f1)
unifyTrees _ _ = fail "tree unification failure"

-- ** conversions between representations

chart2forests :: (Ord n, Ord e) =>
		 SyntaxChart n e  -- ^ The complete chart
	      -> (e -> Bool)      -- ^ When is an edge 'FMeta'?
	      -> [e]              -- ^ The starting edges
	      -> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together.
					-- In essence, the result is a map from 'n' to forest daughters

-- simplest implementation

chart2forests chart isMeta = concatMap (edge2forests [])
    where edge2forests edges edge
              | isMeta edge       = [FMeta]
              | edge `elem` edges = []
              | otherwise         = map (item2forest (edge:edges)) $ chart ? edge
          item2forest edges (SMeta)               = FMeta
          item2forest edges (SNode name children) =
                                    FNode name $ children >>= mapM (edge2forests edges)
          item2forest edges (SString s)           = FString s
          item2forest edges (SInt    n)           = FInt    n
          item2forest edges (SFloat  f)           = FFloat  f

{- -before AR inserted peb's patch 8/7/2007, this was:

chart2forests chart isMeta  = concatMap edge2forests
    where edge2forests edge = if isMeta edge then [FMeta]
			      else map item2forest $ chart ? edge
	  item2forest (SMeta)               = FMeta
	  item2forest (SNode name children) = FNode name $ children >>= mapM edge2forests
	  item2forest (SString s)           = FString s
	  item2forest (SInt    n)           = FInt    n
	  item2forest (SFloat  f)           = FFloat  f

-}

{-
-- more intelligent(?) implementation,
-- requiring that charts and forests are sorted maps and sorted sets
chart2forests chart isMeta = es2fs
    where e2fs  e  = if isMeta e then [FMeta] else map i2f $ chart ? e
	  es2fs es = if null metas then fs else FMeta : fs
	      where (metas, nonMetas) = splitBy isMeta es
		    fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas
	  i2f (name, children) = FNode name $
				 case head children of
				   []  -> [[]]
				   [_] -> map return $ es2fs $ concat children
				   _   -> children >>= mapM e2fs
-}


forest2trees :: SyntaxForest n -> SList (SyntaxTree n)
forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees
forest2trees (FString s) = [TString s]
forest2trees (FInt    n) = [TInt    n]
forest2trees (FFloat  f) = [TFloat  f]
forest2trees (FMeta)     = [TMeta]

----------------------------------------------------------------------
-- * profiles

-- | Pairing a rule name with a profile
data NameProfile a = Name a [Profile (SyntaxForest a)]
		     deriving (Eq, Ord, Show)

name2fun :: NameProfile a -> a
name2fun (Name fun _) = fun

-- | A profile is a simple representation of a function on a number of arguments.
-- We only use lists of profiles
data Profile a = Unify [Int] -- ^ The Int's are the argument positions.
			     -- 'Unify []' will become a metavariable,
			     -- 'Unify [a,b]' means that the arguments are equal,
	       | Constant a
		 deriving (Eq, Ord, Show)

instance Functor Profile where
    fmap f (Constant a) = Constant (f a)
    fmap f (Unify xs)   = Unify xs

-- | a function name where the profile does not contain arguments
-- (i.e. denoting a constant, not a function)
constantNameToForest :: NameProfile a -> SyntaxForest a
constantNameToForest name@(Name fun profile) = FNode fun [map unConstant profile]
    where unConstant (Constant a) = a
	  unConstant (Unify [])   = FMeta
	  unConstant _ = error $ "constantNameToForest: the profile should not contain arguments"

-- | profile application; we need some way of unifying a list of arguments
applyProfile :: ([b] -> a) -> [Profile a] -> [b] -> [a]
applyProfile unify profile args = map apply profile
    where apply (Unify xs)  = unify $ map (args !!) xs
	  apply (Constant a) = a

-- | monadic profile application
applyProfileM :: Monad m => ([b] -> m a) -> [Profile a] -> [b] -> m [a]
applyProfileM unify profile args = mapM apply profile
    where apply (Unify xs)  = unify $ map (args !!) xs
	  apply (Constant a) = return a

-- | profile composition:
--
-- >   applyProfile u z (ps `composeProfiles` qs) args
-- >      ==
-- >   applyProfile u z ps (applyProfile u z qs args)
--
-- compare with function composition
--
-- >   (p . q) arg
-- >      ==
-- >   p (q arg)
--
-- Note that composing an 'Constant' with two or more arguments returns an error
-- (since 'Unify' can only take arguments) -- this might change in the future, if there is a need.
composeProfiles :: [Profile a] -> [Profile a] -> [Profile a]
composeProfiles ps qs = map compose ps
    where compose (Unify [x]) = qs !! x
	  compose (Unify xs)  = Unify [ y | x <- xs, let Unify ys = qs !! x, y <- ys ]
	  compose constant    = constant


------------------------------------------------------------
-- pretty-printing

instance (Print c, Print t) => Print (Symbol c t) where
    prt = symbol prt (simpleShow . prt)
	where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
	      mkEsc '\\' = "\\\\"
	      mkEsc '\"' = "\\\""
	      mkEsc '\n' = "\\n"
	      mkEsc '\t' = "\\t"
	      mkEsc chr  = [chr]
    prtList = prtSep " "

instance Print t => Print (Input t) where
    prt input = "input " ++ prt (inputEdges input)

instance (Print s) => Print (Edge s) where
    prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]"
    prtList = prtSep ""

instance (Print s) => Print (SyntaxTree s) where
    prt (TNode s trees)
	| null trees = prt s
	| otherwise  = "(" ++ prt s ++ prtBefore " " trees ++ ")"
    prt (TString  s) = show s
    prt (TInt     n) = show n
    prt (TFloat   f) = show f
    prt (TMeta)      = "?"
    prtList = prtAfter "\n"

instance (Print s) => Print (SyntaxForest s) where
    prt (FNode s []) = "(" ++ prt s ++ " - ERROR: null forests)"
    prt (FNode s [[]]) = prt s
    prt (FNode s [forests]) = "(" ++ prt s ++ prtBefore " " forests ++ ")"
    prt (FNode s children) = "{" ++ prtSep " | " [ prt s ++ prtBefore " " forests |
						   forests <- children ] ++ "}"
    prt (FString s) = show s
    prt (FInt    n) = show n
    prt (FFloat  f) = show f
    prt (FMeta)     = "?"
    prtList = prtAfter "\n"

instance Print a => Print (Profile a) where
    prt (Unify [])   = "?"
    prt (Unify args) = prtSep "=" args
    prt (Constant a) = prt a

instance Print a => Print (NameProfile a) where
    prt (Name fun profile) = prt fun ++ prt profile