1
0
forked from GitHub/gf-core

Added an O(n log n) version of nub

The new nub is called nub', and it replaces the old sortNub which was 
not lazy and did not retain the order between the elements.
This commit is contained in:
peter.ljunglof
2012-08-29 21:45:10 +00:00
parent 7ad4616d9c
commit 87260365c7
4 changed files with 14 additions and 11 deletions

View File

@@ -22,7 +22,7 @@ import GF.Grammar.Lookup
import GF.Grammar.Predef
import GF.Data.BacktrackM
import GF.Data.Operations
import GF.Data.Utilities (updateNthM, updateNth, sortNub)
import GF.Data.Utilities (updateNthM, updateNth)
import System.IO
import qualified Data.Map as Map

View File

@@ -17,6 +17,7 @@ module GF.Data.Utilities where
import Data.Maybe
import Data.List
import Control.Monad (MonadPlus(..),liftM)
import qualified Data.Set as Set
-- * functions on lists
@@ -67,15 +68,18 @@ safeInit :: [a] -> [a]
safeInit [] = []
safeInit xs = init xs
-- | Like 'nub', but more efficient as it uses sorting internally.
sortNub :: Ord a => [a] -> [a]
sortNub = map head . group . sort
-- | Like 'nub', but O(n log n) instead of O(n^2), since it uses a set to lookup previous things.
-- The result list is stable (the elements are returned in the order they occur), and lazy.
-- Requires that the list elements can be compared by Ord.
-- Code ruthlessly taken from http://hpaste.org/54411
nub' :: Ord a => [a] -> [a]
nub' = loop Set.empty
where loop _ [] = []
loop seen (x : xs)
| Set.member x seen = loop seen xs
| otherwise = x : loop (Set.insert x seen) xs
-- | Like 'nubBy', but more efficient as it uses sorting internally.
sortNubBy :: (a -> a -> Ordering) -> [a] -> [a]
sortNubBy f = map head . sortGroupBy f
-- | Sorts and then groups elements given and ordering of the
-- | Sorts and then groups elements given an ordering of the
-- elements.
sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]]
sortGroupBy f = groupBy (compareEq f) . sortBy f

View File

@@ -53,7 +53,7 @@ isEpsilon (REConcat []) = True
isEpsilon _ = False
unionRE :: Ord a => [RE a] -> RE a
unionRE = unionOrId . sortNub . concatMap toList
unionRE = unionOrId . nub' . concatMap toList
where
toList (REUnion xs) = xs
toList x = [x]

View File

@@ -20,7 +20,6 @@ import qualified Data.IntMap as IntMap
import qualified GF.Data.TrieMap as TrieMap
import qualified Data.List as List
import Control.Monad.ST
import GF.Data.Utilities(sortNub)
optimizePGF :: PGF -> PGF
optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .