1
0
forked from GitHub/gf-core

More efficient implementation of topological sort.

Profiling the compilation of the OALD lexicon showed that 90-95% of the time was spent in topoSort. The old implementation was quadratic. Replaced this with O(E + V) implementation, in GF.Data.Relation. This gave a 10x speed-up (~ 25 sec instead of ~270 sec) for compiling ParseEng and OaldEng.
This commit is contained in:
bjorn
2008-11-27 10:29:29 +00:00
parent 210089b6c8
commit f6cf10e655
3 changed files with 86 additions and 48 deletions

View File

@@ -56,7 +56,7 @@ module GF.Data.Operations (-- * misc functions
sortByLongest, combinations, mkTextFile, initFilePath,
-- * topological sorting with test of cyclicity
topoTest, topoSort, cyclesIn,
topoTest,
-- * the generic fix point iterator
iterFix,
@@ -82,6 +82,7 @@ import Data.Map (Map)
import Control.Monad (liftM,liftM2, MonadPlus, mzero, mplus)
import GF.Data.ErrM
import GF.Data.Relation
infixr 5 +++
infixr 5 ++-
@@ -477,36 +478,8 @@ initFilePath :: FilePath -> FilePath
initFilePath f = reverse (dropWhile (/='/') (reverse f))
-- | topological sorting with test of cyclicity
topoTest :: Eq a => [(a,[a])] -> Either [a] [[a]]
topoTest g = if length g' == length g then Left g' else Right (cyclesIn g ++[[]])
where
g' = topoSort g
cyclesIn :: Eq a => [(a,[a])] -> [[a]]
cyclesIn deps = nubb $ clean $ filt $ iterFix findDep immediate where
immediate = [[y,x] | (x,xs) <- deps, y <- xs]
findDep chains = [y:x:chain |
x:chain <- chains, (x',xs) <- deps, x' == x, y <- xs,
notElem y (init chain)]
clean = map remdup
nubb = nubBy (\x y -> y == reverse x)
filt = filter (\xs -> last xs == head xs)
remdup (x:xs) = x : remdup xs' where xs' = dropWhile (==x) xs
remdup [] = []
-- | topological sorting
topoSort :: Eq a => [(a,[a])] -> [a]
topoSort g = reverse $ tsort 0 [ffs | ffs@(f,_) <- g, inDeg f == 0] [] where
tsort _ [] r = r
tsort k (ffs@(f,fs) : cs) r
| elem f r = tsort k cs r
| k > lx = r
| otherwise = tsort (k+1) cs (f : tsort (k+1) (info fs) r)
info hs = [(f,fs) | (f,fs) <- g, elem f hs]
inDeg f = length [t | (h,hs) <- g, t <- hs, t == f]
lx = length g
topoTest :: Ord a => [(a,[a])] -> Either [a] [[a]]
topoTest = topologicalSort . mkRel'
-- | the generic fix point iterator
iterFix :: Eq a => ([a] -> [a]) -> [a] -> [a]

View File

@@ -22,12 +22,16 @@ module GF.Data.Relation (Rel, mkRel, mkRel'
, equivalenceClasses
, isTransitive, isReflexive, isSymmetric
, isEquivalence
, isSubRelationOf) where
, isSubRelationOf
, topologicalSort) where
import Data.Foldable (toList)
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set
@@ -44,7 +48,7 @@ mkRel ps = relates ps Map.empty
mkRel' :: Ord a => [(a,[a])] -> Rel a
mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
relToList :: Rel a -> [(a,a)]
relToList :: Ord a => Rel a -> [(a,a)]
relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
-- | Add a pair to the relation.
@@ -67,6 +71,9 @@ allRelated r x = fromMaybe Set.empty (Map.lookup x r)
domain :: Ord a => Rel a -> Set a
domain r = foldl Set.union (Map.keysSet r) (Map.elems r)
reverseRel :: Ord a => Rel a -> Rel a
reverseRel r = mkRel [(y,x) | (x,y) <- relToList r]
-- | Keep only pairs for which both elements are in the given set.
intersectSetRel :: Ord a => Set a -> Rel a -> Rel a
intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s)
@@ -98,12 +105,12 @@ reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member`
-- | Keep the related pairs for which the predicate is true.
filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a
filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p)
filterRel p = fst . purgeEmpty . Map.mapWithKey (Set.filter . p)
-- | Remove keys that map to no elements.
purgeEmpty :: Ord a => Rel a -> Rel a
purgeEmpty r = Map.filter (not . Set.null) r
purgeEmpty :: Ord a => Rel a -> (Rel a, Set a)
purgeEmpty r = let (r',r'') = Map.partition (not . Set.null) r
in (r', Map.keysSet r'')
-- | Get the equivalence classes from an equivalence relation.
equivalenceClasses :: Ord a => Rel a -> [Set a]
@@ -128,3 +135,59 @@ isEquivalence r = isReflexive r && isSymmetric r && isTransitive r
isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool
isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1)
-- | Returns 'Left' if there are cycles, and 'Right' if there are cycles.
topologicalSort :: Ord a => Rel a -> Either [a] [[a]]
topologicalSort r = tsort r' noIncoming Seq.empty
where r' = relToRel' r
noIncoming = Seq.fromList [x | (x,(is,_)) <- Map.toList r', Set.null is]
tsort :: Ord a => Rel' a -> Seq a -> Seq a -> Either [a] [[a]]
tsort r xs l = case Seq.viewl xs of
Seq.EmptyL | isEmpty' r -> Left (toList l)
| otherwise -> Right (findCycles (rel'ToRel r))
x Seq.:< xs -> tsort r' (xs Seq.>< Seq.fromList new) (l Seq.|> x)
where (r',_,os) = remove x r
new = [o | o <- Set.toList os, Set.null (incoming o r')]
findCycles :: Ord a => Rel a -> [[a]]
findCycles = map Set.toList . equivalenceClasses . reflexiveSubrelation . symmetricSubrelation . transitiveClosure
--
-- * Alternative representation that keeps both incoming and outgoing edges
--
-- | Keeps both incoming and outgoing edges.
type Rel' a = Map a (Set a, Set a)
isEmpty' :: Ord a => Rel' a -> Bool
isEmpty' = Map.null
relToRel' :: Ord a => Rel a -> Rel' a
relToRel' r = Map.unionWith (\ (i,_) (_,o) -> (i,o)) ir or
where ir = Map.map (\s -> (s,Set.empty)) $ reverseRel r
or = Map.map (\s -> (Set.empty,s)) $ r
rel'ToRel :: Ord a => Rel' a -> Rel a
rel'ToRel = Map.map snd
-- | Removes an element from a relation.
-- Returns the new relation, and the set of incoming and outgoing edges
-- of the removed element.
remove :: Ord a => a -> Rel' a -> (Rel' a, Set a, Set a)
remove x r = let (mss,r') = Map.updateLookupWithKey (\_ _ -> Nothing) x r
in case mss of
-- element was not in the relation
Nothing -> (r', Set.empty, Set.empty)
-- remove element from all incoming and outgoing sets
-- of other elements
Just (is,os) ->
let r'' = foldr (\i -> Map.adjust (\ (is',os') -> (is', Set.delete x os')) i) r' $ Set.toList is
r''' = foldr (\o -> Map.adjust (\ (is',os') -> (Set.delete x is', os')) o) r'' $ Set.toList os
in (r''', is, os)
incoming :: Ord a => a -> Rel' a -> Set a
incoming x r = maybe Set.empty fst $ Map.lookup x r
outgoing :: Ord a => a -> Rel' a -> Set a
outgoing x r = maybe Set.empty snd $ Map.lookup x r

View File

@@ -87,13 +87,13 @@ data ModuleType i =
| MTInstance i
| MTReuse (MReuseType i)
| MTUnion (ModuleType i) [(i,[i])] -- ^ not meant to be recursive
deriving (Eq,Show)
deriving (Eq,Ord,Show)
data MReuseType i = MRInterface i | MRInstance i i | MRResource i
deriving (Show,Eq)
deriving (Eq,Ord,Show)
data MInclude i = MIAll | MIOnly [i] | MIExcept [i]
deriving (Show,Eq)
deriving (Eq,Ord,Show)
extends :: Module i a -> [i]
extends = map fst . extend
@@ -165,13 +165,13 @@ data MainConcreteSpec i = MainConcreteSpec {
data OpenSpec i =
OSimple OpenQualif i
| OQualif OpenQualif i i
deriving (Eq,Show)
deriving (Eq,Ord,Show)
data OpenQualif =
OQNormal
| OQInterface
| OQIncomplete
deriving (Eq,Show)
deriving (Eq,Ord,Show)
oSimple :: i -> OpenSpec i
oSimple = OSimple OQNormal
@@ -182,7 +182,7 @@ oQualif = OQualif OQNormal
data ModuleStatus =
MSComplete
| MSIncomplete
deriving (Eq,Show)
deriving (Eq,Ord,Show)
openedModule :: OpenSpec i -> i
openedModule o = case o of
@@ -280,7 +280,7 @@ data IdentM i = IdentM {
identM :: i ,
typeM :: ModuleType i
}
deriving (Eq,Show)
deriving (Eq,Ord,Show)
typeOfModule :: ModInfo i a -> ModuleType i
typeOfModule mi = case mi of
@@ -402,12 +402,14 @@ isCompleteModule m = mstatus m == MSComplete && mtype m /= MTInterface
-- | all abstract modules sorted from least to most dependent
allAbstracts :: Eq i => MGrammar i a -> [i]
allAbstracts gr = topoSort
[(i,extends m) | (i,ModMod m) <- modules gr, mtype m == MTAbstract]
allAbstracts :: (Ord i, Show i) => MGrammar i a -> [i]
allAbstracts gr =
case topoTest [(i,extends m) | (i,ModMod m) <- modules gr, mtype m == MTAbstract] of
Left is -> is
Right cycles -> error $ "Cyclic abstract modules: " ++ show cycles
-- | the last abstract in dependency order (head of list)
greatestAbstract :: Eq i => MGrammar i a -> Maybe i
greatestAbstract :: (Ord i, Show i) => MGrammar i a -> Maybe i
greatestAbstract gr = case allAbstracts gr of
[] -> Nothing
as -> return $ last as