Added an O(n log n) version of nub

The new nub is called nub', and it replaces the old sortNub which was 
not lazy and did not retain the order between the elements.
This commit is contained in:
peter.ljunglof
2012-08-29 21:45:10 +00:00
parent e2ecdfed1f
commit a7de16c34b
4 changed files with 14 additions and 11 deletions

View File

@@ -22,7 +22,7 @@ import GF.Grammar.Lookup
import GF.Grammar.Predef import GF.Grammar.Predef
import GF.Data.BacktrackM import GF.Data.BacktrackM
import GF.Data.Operations import GF.Data.Operations
import GF.Data.Utilities (updateNthM, updateNth, sortNub) import GF.Data.Utilities (updateNthM, updateNth)
import System.IO import System.IO
import qualified Data.Map as Map import qualified Data.Map as Map

View File

@@ -17,6 +17,7 @@ module GF.Data.Utilities where
import Data.Maybe import Data.Maybe
import Data.List import Data.List
import Control.Monad (MonadPlus(..),liftM) import Control.Monad (MonadPlus(..),liftM)
import qualified Data.Set as Set
-- * functions on lists -- * functions on lists
@@ -67,15 +68,18 @@ safeInit :: [a] -> [a]
safeInit [] = [] safeInit [] = []
safeInit xs = init xs safeInit xs = init xs
-- | Like 'nub', but more efficient as it uses sorting internally. -- | Like 'nub', but O(n log n) instead of O(n^2), since it uses a set to lookup previous things.
sortNub :: Ord a => [a] -> [a] -- The result list is stable (the elements are returned in the order they occur), and lazy.
sortNub = map head . group . sort -- Requires that the list elements can be compared by Ord.
-- Code ruthlessly taken from http://hpaste.org/54411
nub' :: Ord a => [a] -> [a]
nub' = loop Set.empty
where loop _ [] = []
loop seen (x : xs)
| Set.member x seen = loop seen xs
| otherwise = x : loop (Set.insert x seen) xs
-- | Like 'nubBy', but more efficient as it uses sorting internally. -- | Sorts and then groups elements given an ordering of the
sortNubBy :: (a -> a -> Ordering) -> [a] -> [a]
sortNubBy f = map head . sortGroupBy f
-- | Sorts and then groups elements given and ordering of the
-- elements. -- elements.
sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]] sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]]
sortGroupBy f = groupBy (compareEq f) . sortBy f sortGroupBy f = groupBy (compareEq f) . sortBy f

View File

@@ -53,7 +53,7 @@ isEpsilon (REConcat []) = True
isEpsilon _ = False isEpsilon _ = False
unionRE :: Ord a => [RE a] -> RE a unionRE :: Ord a => [RE a] -> RE a
unionRE = unionOrId . sortNub . concatMap toList unionRE = unionOrId . nub' . concatMap toList
where where
toList (REUnion xs) = xs toList (REUnion xs) = xs
toList x = [x] toList x = [x]

View File

@@ -20,7 +20,6 @@ import qualified Data.IntMap as IntMap
import qualified GF.Data.TrieMap as TrieMap import qualified GF.Data.TrieMap as TrieMap
import qualified Data.List as List import qualified Data.List as List
import Control.Monad.ST import Control.Monad.ST
import GF.Data.Utilities(sortNub)
optimizePGF :: PGF -> PGF optimizePGF :: PGF -> PGF
optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) . optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .