diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 0dcb8c8cd..bb4c5b549 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -22,7 +22,7 @@ import GF.Grammar.Lookup import GF.Grammar.Predef import GF.Data.BacktrackM import GF.Data.Operations -import GF.Data.Utilities (updateNthM, updateNth, sortNub) +import GF.Data.Utilities (updateNthM, updateNth) import System.IO import qualified Data.Map as Map diff --git a/src/compiler/GF/Data/Utilities.hs b/src/compiler/GF/Data/Utilities.hs index 74d3ef81e..50269bef1 100644 --- a/src/compiler/GF/Data/Utilities.hs +++ b/src/compiler/GF/Data/Utilities.hs @@ -17,6 +17,7 @@ module GF.Data.Utilities where import Data.Maybe import Data.List import Control.Monad (MonadPlus(..),liftM) +import qualified Data.Set as Set -- * functions on lists @@ -67,15 +68,18 @@ safeInit :: [a] -> [a] safeInit [] = [] safeInit xs = init xs --- | Like 'nub', but more efficient as it uses sorting internally. -sortNub :: Ord a => [a] -> [a] -sortNub = map head . group . sort +-- | Like 'nub', but O(n log n) instead of O(n^2), since it uses a set to lookup previous things. +-- The result list is stable (the elements are returned in the order they occur), and lazy. +-- Requires that the list elements can be compared by Ord. +-- Code ruthlessly taken from http://hpaste.org/54411 +nub' :: Ord a => [a] -> [a] +nub' = loop Set.empty + where loop _ [] = [] + loop seen (x : xs) + | Set.member x seen = loop seen xs + | otherwise = x : loop (Set.insert x seen) xs --- | Like 'nubBy', but more efficient as it uses sorting internally. -sortNubBy :: (a -> a -> Ordering) -> [a] -> [a] -sortNubBy f = map head . sortGroupBy f - --- | Sorts and then groups elements given and ordering of the +-- | Sorts and then groups elements given an ordering of the -- elements. sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]] sortGroupBy f = groupBy (compareEq f) . sortBy f diff --git a/src/compiler/GF/Speech/RegExp.hs b/src/compiler/GF/Speech/RegExp.hs index 2592b3d57..bf9f89cb3 100644 --- a/src/compiler/GF/Speech/RegExp.hs +++ b/src/compiler/GF/Speech/RegExp.hs @@ -53,7 +53,7 @@ isEpsilon (REConcat []) = True isEpsilon _ = False unionRE :: Ord a => [RE a] -> RE a -unionRE = unionOrId . sortNub . concatMap toList +unionRE = unionOrId . nub' . concatMap toList where toList (REUnion xs) = xs toList x = [x] diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index f7fb79779..833684fe0 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -20,7 +20,6 @@ import qualified Data.IntMap as IntMap import qualified GF.Data.TrieMap as TrieMap import qualified Data.List as List import Control.Monad.ST -import GF.Data.Utilities(sortNub) optimizePGF :: PGF -> PGF optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .