1
0
forked from GitHub/gf-core
Files
gf-core/src/GF/Formalism/Utilities.hs
2005-04-16 04:40:48 +00:00

278 lines
9.6 KiB
Haskell

----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/16 05:40:49 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.3 $
--
-- Basic type declarations and functions for grammar formalisms
-----------------------------------------------------------------------------
module GF.Formalism.Utilities where
import Monad
import Array
import List (groupBy)
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
import GF.Infra.Print
------------------------------------------------------------
-- * symbols
data Symbol c t = Cat c | Tok t
deriving (Eq, Ord, Show)
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
symbol fc ft (Cat cat) = fc cat
symbol fc ft (Tok tok) = ft tok
mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u
mapSymbol fc ft = symbol (Cat . fc) (Tok . ft)
filterCats :: [Symbol c t] -> [c]
filterCats syms = [ cat | Cat cat <- syms ]
filterToks :: [Symbol c t] -> [t]
filterToks syms = [ tok | Tok tok <- syms ]
------------------------------------------------------------
-- * edges
data Edge s = Edge Int Int s
deriving (Eq, Ord, Show)
instance Functor Edge where
fmap f (Edge i j s) = Edge i j (f s)
------------------------------------------------------------
-- * representaions of input tokens
data Input t = MkInput { inputEdges :: [Edge t],
inputBounds :: (Int, Int),
inputFrom :: Array Int (Assoc t [Int]),
inputTo :: Array Int (Assoc t [Int]),
inputToken :: Assoc t [(Int, Int)]
}
makeInput :: Ord t => [Edge t] -> Input t
input :: Ord t => [t] -> Input t
inputMany :: Ord t => [[t]] -> Input t
instance Show t => Show (Input t) where
show input = "makeInput " ++ show (inputEdges input)
----------
makeInput inEdges | null inEdges = input []
| otherwise = MkInput inEdges inBounds inFrom inTo inToken
where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ]
where minmax (a, b) (a', b') = (min a a', max b b')
inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $
[ (i, [(tok, j)]) | Edge i j tok <- inEdges ]
inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds
[ (j, [(tok, i)]) | Edge i j tok <- inEdges ]
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
input toks = MkInput inEdges inBounds inFrom inTo inToken
where inEdges = zipWith3 Edge [0..] [1..] toks
inBounds = (0, length toks)
inFrom = listArray inBounds $
[ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ]
inTo = listArray inBounds $
[ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ]
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
inputMany toks = MkInput inEdges inBounds inFrom inTo inToken
where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ]
inBounds = (0, length toks)
inFrom = listArray inBounds $
[ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ]
++ [ listAssoc [] ]
inTo = listArray inBounds $
[ listAssoc [] ] ++
[ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ]
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
------------------------------------------------------------
-- * charts, forests & trees
-- | The values of the chart, a list of key-daughters pairs,
-- has unique keys. In essence, it is a map from 'n' to daughters.
-- The daughters should be a set (not necessarily sorted) of rhs's.
type SyntaxChart n e = Assoc e [(n, [[e]])]
-- better(?) representation of forests:
-- data Forest n = F (SMap n (SList [Forest n])) Bool
-- ==
-- type Forest n = GeneralTrie n (SList [Forest n]) Bool
-- (the Bool == isMeta)
data SyntaxForest n = FMeta
| FNode n [[SyntaxForest n]]
-- ^ The outer list should be a set (not necessarily sorted)
-- of possible alternatives. Ie. the outer list
-- is a disjunctive node, and the inner lists
-- are (conjunctive) concatenative nodes
deriving (Eq, Ord, Show)
data SyntaxTree n = TMeta | TNode n [SyntaxTree n]
deriving (Eq, Ord, Show)
forestName :: SyntaxForest n -> Maybe n
forestName (FNode n _) = Just n
forestName (FMeta) = Nothing
treeName :: SyntaxTree n -> Maybe n
treeName (TNode n _) = Just n
treeName (TMeta) = Nothing
instance Functor SyntaxTree where
fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
fmap f (TMeta) = TMeta
instance Functor SyntaxForest where
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
fmap f (FMeta) = FMeta
{- måste tänka mer på detta:
compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n)
compactForests = map joinForests . groupBy eqNames . sortForests
where eqNames f g = forestName f == forestName g
sortForests = foldMerge mergeForests [] . map return
mergeForests [] gs = gs
mergeForests fs [] = fs
mergeForests fs@(f:fs') gs@(g:gs')
= case forestName f `compare` forestName g of
LT -> f : mergeForests fs' gs
GT -> g : mergeForests fs gs'
EQ -> f : g : mergeForests fs' gs'
joinForests fs = case forestName (head fs) of
Nothing -> FMeta
Just name -> FNode name $
compactDaughters $
concat [ fss | FNode _ fss <- fs ]
compactDaughters fss = case head fss of
[] -> [[]]
[_] -> map return $ compactForests $ concat fss
_ -> nubsort fss
-}
-- ** conversions between representations
forest2trees :: SyntaxForest n -> SList (SyntaxTree n)
forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees
forest2trees (FMeta) = [TMeta]
chart2forests :: (Ord n, Ord e) =>
SyntaxChart n e -- ^ The complete chart
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
-> [e] -- ^ The starting edges
-> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together.
-- In essence, the result is a map from 'n' to forest daughters
-- simplest implementation
chart2forests chart isMeta = concatMap edge2forests
where edge2forests edge = if isMeta edge then [FMeta]
else map item2forest $ chart ? edge
item2forest (name, children) = FNode name $ children >>= mapM edge2forests
{-
-- more intelligent(?) implementation,
-- requiring that charts and forests are sorted maps and sorted sets
chart2forests chart isMeta = es2fs
where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e
es2fs es = if null metas then fs else FMeta : fs
where (metas, nonMetas) = splitBy isMeta es
fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas
i2f (name, children) = FNode name $
case head children of
[] -> [[]]
[_] -> map return $ es2fs $ concat children
_ -> children >>= mapM e2fs
-}
-- ** operations on forests
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
unifyManyForests = foldM unifyForests FMeta
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
-- and all children can be unified
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
unifyForests FMeta forest = return forest
unifyForests forest FMeta = return forest
unifyForests (FNode name1 children1) (FNode name2 children2)
| name1 == name2 && not (null children) = return $ FNode name1 children
| otherwise = fail "forest unification failure"
where children = [ forests | forests1 <- children1, forests2 <- children2,
sameLength forests1 forests2,
forests <- zipWithM unifyForests forests1 forests2 ]
-- ** operations on trees
unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n)
unifyManyTrees = foldM unifyTrees TMeta
-- | two trees can be unified, if either is 'TMeta',
-- or both have the same parent, and their children can be unified
unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n)
unifyTrees TMeta tree = return tree
unifyTrees tree TMeta = return tree
unifyTrees (TNode name1 children1) (TNode name2 children2)
| name1 == name2 && sameLength children1 children2
= liftM (TNode name1) $ zipWithM unifyTrees children1 children2
| otherwise = fail "tree unification failure"
------------------------------------------------------------
-- pretty-printing
instance (Print c, Print t) => Print (Symbol c t) where
prt = symbol prt (simpleShow . prt)
where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
mkEsc '\\' = "\\\\"
mkEsc '\"' = "\\\""
mkEsc '\n' = "\\n"
mkEsc '\t' = "\\t"
mkEsc chr = [chr]
prtList = prtSep " "
instance Print t => Print (Input t) where
prt input = "input " ++ prt (inputEdges input)
instance (Print s) => Print (Edge s) where
prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]"
prtList = prtSep ""
instance (Print s) => Print (SyntaxTree s) where
prt (TNode s trees)
| null trees = prt s
| otherwise = "(" ++ prt s ++ prtBefore " " trees ++ ")"
prt (TMeta) = "?"
prtList = prtAfter "\n"
instance (Print s) => Print (SyntaxForest s) where
prt (FNode s []) = "(" ++ prt s ++ " - ERROR: null forests)"
prt (FNode s [[]]) = prt s
prt (FNode s [forests]) = "(" ++ prt s ++ prtBefore " " forests ++ ")"
prt (FNode s children) = "{" ++ prtSep " | " [ prt s ++ prtBefore " " forests |
forests <- children ] ++ "}"
prt (FMeta) = "?"
prtList = prtAfter "\n"