From a7de16c34b7ccffc5ae0ac4fd004dfc155b4f546 Mon Sep 17 00:00:00 2001 From: "peter.ljunglof" Date: Wed, 29 Aug 2012 21:45:10 +0000 Subject: [PATCH] Added an O(n log n) version of nub The new nub is called nub', and it replaces the old sortNub which was not lazy and did not retain the order between the elements. --- src/compiler/GF/Compile/GeneratePMCFG.hs | 2 +- src/compiler/GF/Data/Utilities.hs | 20 ++++++++++++-------- src/compiler/GF/Speech/RegExp.hs | 2 +- src/runtime/haskell/PGF/Optimize.hs | 1 - 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 0dcb8c8cd..bb4c5b549 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -22,7 +22,7 @@ import GF.Grammar.Lookup import GF.Grammar.Predef import GF.Data.BacktrackM import GF.Data.Operations -import GF.Data.Utilities (updateNthM, updateNth, sortNub) +import GF.Data.Utilities (updateNthM, updateNth) import System.IO import qualified Data.Map as Map diff --git a/src/compiler/GF/Data/Utilities.hs b/src/compiler/GF/Data/Utilities.hs index 74d3ef81e..50269bef1 100644 --- a/src/compiler/GF/Data/Utilities.hs +++ b/src/compiler/GF/Data/Utilities.hs @@ -17,6 +17,7 @@ module GF.Data.Utilities where import Data.Maybe import Data.List import Control.Monad (MonadPlus(..),liftM) +import qualified Data.Set as Set -- * functions on lists @@ -67,15 +68,18 @@ safeInit :: [a] -> [a] safeInit [] = [] safeInit xs = init xs --- | Like 'nub', but more efficient as it uses sorting internally. -sortNub :: Ord a => [a] -> [a] -sortNub = map head . group . sort +-- | Like 'nub', but O(n log n) instead of O(n^2), since it uses a set to lookup previous things. +-- The result list is stable (the elements are returned in the order they occur), and lazy. +-- Requires that the list elements can be compared by Ord. +-- Code ruthlessly taken from http://hpaste.org/54411 +nub' :: Ord a => [a] -> [a] +nub' = loop Set.empty + where loop _ [] = [] + loop seen (x : xs) + | Set.member x seen = loop seen xs + | otherwise = x : loop (Set.insert x seen) xs --- | Like 'nubBy', but more efficient as it uses sorting internally. -sortNubBy :: (a -> a -> Ordering) -> [a] -> [a] -sortNubBy f = map head . sortGroupBy f - --- | Sorts and then groups elements given and ordering of the +-- | Sorts and then groups elements given an ordering of the -- elements. sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]] sortGroupBy f = groupBy (compareEq f) . sortBy f diff --git a/src/compiler/GF/Speech/RegExp.hs b/src/compiler/GF/Speech/RegExp.hs index 2592b3d57..bf9f89cb3 100644 --- a/src/compiler/GF/Speech/RegExp.hs +++ b/src/compiler/GF/Speech/RegExp.hs @@ -53,7 +53,7 @@ isEpsilon (REConcat []) = True isEpsilon _ = False unionRE :: Ord a => [RE a] -> RE a -unionRE = unionOrId . sortNub . concatMap toList +unionRE = unionOrId . nub' . concatMap toList where toList (REUnion xs) = xs toList x = [x] diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index f7fb79779..833684fe0 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -20,7 +20,6 @@ import qualified Data.IntMap as IntMap import qualified GF.Data.TrieMap as TrieMap import qualified Data.List as List import Control.Monad.ST -import GF.Data.Utilities(sortNub) optimizePGF :: PGF -> PGF optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .