From 733fdac755aac4752b60e42402f6a2ba67a58d57 Mon Sep 17 00:00:00 2001 From: krangelov Date: Sun, 15 Mar 2020 19:57:47 +0100 Subject: [PATCH] restore the sequence ordering after -optimize-pgf --- src/compiler/GF/Compile/GrammarToPGF.hs | 133 +----------------------- src/runtime/haskell/PGF/Macros.hs | 127 ++++++++++++++++++++++ src/runtime/haskell/PGF/Optimize.hs | 21 +++- 3 files changed, 146 insertions(+), 135 deletions(-) diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index b83154e19..8c4d4558c 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, FlexibleContexts, MagicHash #-} +{-# LANGUAGE BangPatterns, FlexibleContexts #-} module GF.Compile.GrammarToPGF (mkCanon2pgf) where --import GF.Compile.Export @@ -8,16 +8,13 @@ import GF.Compile.GenerateBC import PGF(CId,mkCId,utf8CId) import PGF.Internal(fidInt,fidFloat,fidString,fidVar) import PGF.Internal(updateProductionIndices) ---import qualified PGF.Macros as CM import qualified PGF.Internal as C import qualified PGF.Internal as D import GF.Grammar.Predef ---import GF.Grammar.Printer import GF.Grammar.Grammar import qualified GF.Grammar.Lookup as Look import qualified GF.Grammar as A import qualified GF.Grammar.Macros as GM ---import GF.Compile.GeneratePMCFG import GF.Infra.Ident import GF.Infra.Option @@ -30,9 +27,6 @@ import qualified Data.Map as Map import qualified Data.IntMap as IntMap import Data.Array.IArray -import Data.Char -import GHC.Prim -import GHC.Base(getTag) mkCanon2pgf :: Options -> SourceGrammar -> ModuleName -> IOE D.PGF mkCanon2pgf opts gr am = do @@ -65,7 +59,7 @@ mkCanon2pgf opts gr am = do mkConcr cm = do let cflags = err (const noOptions) mflags (lookupModule gr cm) ciCmp | flag optCaseSensitive cflags = compare - | otherwise = compareCaseInsensitve + | otherwise = C.compareCaseInsensitve (ex_seqs,cdefs) <- addMissingPMCFGs Map.empty @@ -74,7 +68,7 @@ mkCanon2pgf opts gr am = do let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags] - seqs = (mkArray . sortNubBy ciCmp . concat) $ + seqs = (mkArray . C.sortNubBy ciCmp . concat) $ (Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm]) ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence @@ -312,124 +306,3 @@ genPrintNames cdefs = mkArray lst = listArray (0,length lst-1) lst mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] - --- The following is a version of Data.List.sortBy which together --- with the sorting also eliminates duplicate values -sortNubBy cmp = mergeAll . sequences - where - sequences (a:b:xs) = - case cmp a b of - GT -> descending b [a] xs - EQ -> sequences (b:xs) - LT -> ascending b (a:) xs - sequences xs = [xs] - - descending a as [] = [a:as] - descending a as (b:bs) = - case cmp a b of - GT -> descending b (a:as) bs - EQ -> descending a as bs - LT -> (a:as) : sequences (b:bs) - - ascending a as [] = let !x = as [a] - in [x] - ascending a as (b:bs) = - case cmp a b of - GT -> let !x = as [a] - in x : sequences (b:bs) - EQ -> ascending a as bs - LT -> ascending b (\ys -> as (a:ys)) bs - - mergeAll [x] = x - mergeAll xs = mergeAll (mergePairs xs) - - mergePairs (a:b:xs) = let !x = merge a b - in x : mergePairs xs - mergePairs xs = xs - - merge as@(a:as') bs@(b:bs') = - case cmp a b of - GT -> b:merge as bs' - EQ -> a:merge as' bs' - LT -> a:merge as' bs - merge [] bs = bs - merge as [] = as - --- The following function does case-insensitive comparison of sequences. --- This is used to allow case-insensitive parsing, while --- the linearizer still has access to the original cases. -compareCaseInsensitve s1 s2 = - case compareSeq (elems s1) (elems s2) of - (EQ,c) -> c - (c, _) -> c - where - compareSeq [] [] = dup EQ - compareSeq [] _ = dup LT - compareSeq _ [] = dup GT - compareSeq (x:xs) (y:ys) = - case compareSym x y of - (EQ,EQ) -> compareSeq xs ys - (EQ,c2) -> case compareSeq xs ys of - (c1,_) -> (c1,c2) - x -> x - - compareSym s1 s2 = - case s1 of - D.SymCat d1 r1 - -> case s2 of - D.SymCat d2 r2 - -> case compare d1 d2 of - EQ -> dup (r1 `compare` r2) - x -> dup x - _ -> dup LT - D.SymLit d1 r1 - -> case s2 of - D.SymCat {} -> dup GT - D.SymLit d2 r2 - -> case compare d1 d2 of - EQ -> dup (r1 `compare` r2) - x -> dup x - _ -> dup LT - D.SymVar d1 r1 - -> if tagToEnum# (getTag s2 ># 2#) - then dup LT - else case s2 of - D.SymVar d2 r2 - -> case compare d1 d2 of - EQ -> dup (r1 `compare` r2) - x -> dup x - _ -> dup GT - D.SymKS t1 - -> if tagToEnum# (getTag s2 ># 3#) - then dup LT - else case s2 of - D.SymKS t2 -> t1 `compareToken` t2 - _ -> dup GT - D.SymKP a1 b1 - -> if tagToEnum# (getTag s2 ># 4#) - then dup LT - else case s2 of - D.SymKP a2 b2 - -> case compare a1 a2 of - EQ -> dup (b1 `compare` b2) - x -> dup x - _ -> dup GT - _ -> let t1 = getTag s1 - t2 = getTag s2 - in if tagToEnum# (t1 <# t2) - then dup LT - else if tagToEnum# (t1 ==# t2) - then dup EQ - else dup GT - - compareToken [] [] = dup EQ - compareToken [] _ = dup LT - compareToken _ [] = dup GT - compareToken (x:xs) (y:ys) - | x == y = compareToken xs ys - | otherwise = case compare (toLower x) (toLower y) of - EQ -> case compareToken xs ys of - (c,_) -> (c,compare x y) - c -> dup c - - dup x = (x,x) diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index b60c8a0d4..08052ce2f 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE MagicHash, BangPatterns, FlexibleContexts #-} module PGF.Macros where import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint @@ -13,6 +14,9 @@ import qualified Data.Array as Array import Data.List import Data.Array.IArray import Text.PrettyPrint +import GHC.Prim +import GHC.Base(getTag) +import Data.Char -- operations for manipulating PGF grammars and objects @@ -241,3 +245,126 @@ computeSeq filter seq args = concatMap compute seq flattenBracketedString :: BracketedString -> [String] flattenBracketedString (Leaf w) = [w] flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss + + +-- The following is a version of Data.List.sortBy which together +-- with the sorting also eliminates duplicate values +sortNubBy cmp = mergeAll . sequences + where + sequences (a:b:xs) = + case cmp a b of + GT -> descending b [a] xs + EQ -> sequences (b:xs) + LT -> ascending b (a:) xs + sequences xs = [xs] + + descending a as [] = [a:as] + descending a as (b:bs) = + case cmp a b of + GT -> descending b (a:as) bs + EQ -> descending a as bs + LT -> (a:as) : sequences (b:bs) + + ascending a as [] = let !x = as [a] + in [x] + ascending a as (b:bs) = + case cmp a b of + GT -> let !x = as [a] + in x : sequences (b:bs) + EQ -> ascending a as bs + LT -> ascending b (\ys -> as (a:ys)) bs + + mergeAll [x] = x + mergeAll xs = mergeAll (mergePairs xs) + + mergePairs (a:b:xs) = let !x = merge a b + in x : mergePairs xs + mergePairs xs = xs + + merge as@(a:as') bs@(b:bs') = + case cmp a b of + GT -> b:merge as bs' + EQ -> a:merge as' bs' + LT -> a:merge as' bs + merge [] bs = bs + merge as [] = as + + +-- The following function does case-insensitive comparison of sequences. +-- This is used to allow case-insensitive parsing, while +-- the linearizer still has access to the original cases. +compareCaseInsensitve s1 s2 = + case compareSeq (elems s1) (elems s2) of + (EQ,c) -> c + (c, _) -> c + where + compareSeq [] [] = dup EQ + compareSeq [] _ = dup LT + compareSeq _ [] = dup GT + compareSeq (x:xs) (y:ys) = + case compareSym x y of + (EQ,EQ) -> compareSeq xs ys + (EQ,c2) -> case compareSeq xs ys of + (c1,_) -> (c1,c2) + x -> x + + compareSym s1 s2 = + case s1 of + SymCat d1 r1 + -> case s2 of + SymCat d2 r2 + -> case compare d1 d2 of + EQ -> dup (r1 `compare` r2) + x -> dup x + _ -> dup LT + SymLit d1 r1 + -> case s2 of + SymCat {} -> dup GT + SymLit d2 r2 + -> case compare d1 d2 of + EQ -> dup (r1 `compare` r2) + x -> dup x + _ -> dup LT + SymVar d1 r1 + -> if tagToEnum# (getTag s2 ># 2#) + then dup LT + else case s2 of + SymVar d2 r2 + -> case compare d1 d2 of + EQ -> dup (r1 `compare` r2) + x -> dup x + _ -> dup GT + SymKS t1 + -> if tagToEnum# (getTag s2 ># 3#) + then dup LT + else case s2 of + SymKS t2 -> t1 `compareToken` t2 + _ -> dup GT + SymKP a1 b1 + -> if tagToEnum# (getTag s2 ># 4#) + then dup LT + else case s2 of + SymKP a2 b2 + -> case compare a1 a2 of + EQ -> dup (b1 `compare` b2) + x -> dup x + _ -> dup GT + _ -> let t1 = getTag s1 + t2 = getTag s2 + in if tagToEnum# (t1 <# t2) + then dup LT + else if tagToEnum# (t1 ==# t2) + then dup EQ + else dup GT + + compareToken [] [] = dup EQ + compareToken [] _ = dup LT + compareToken _ [] = dup GT + compareToken (x:xs) (y:ys) + | x == y = compareToken xs ys + | otherwise = case compare (toLower x) (toLower y) of + EQ -> case compareToken xs ys of + (c,_) -> (c,compare x y) + c -> dup c + + dup x = (x,x) diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index d3fb9290e..451955647 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BangPatterns, FlexibleContexts #-} module PGF.Optimize ( optimizePGF , updateProductionIndices @@ -44,9 +44,9 @@ topDownFilter startCat cnc = env2 (productions cnc) cats = Map.mapWithKey filterCatLabels (cnccats cnc) - (seqs,funs) = env3 - in cnc{ sequences = mkSetArray seqs - , cncfuns = mkSetArray funs + (seqs,funs) = reorderSeqs env3 + in cnc{ sequences = seqs + , cncfuns = funs , productions = prods , cnccats = cats , lindefs = defs @@ -171,7 +171,18 @@ topDownFilter startCat cnc = in CncCat start end lbls' Nothing -> error "unknown category" - mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + reorderSeqs (seqs,funs) = (seqs',funs') + where + sorted = sortNubBy ciCmp (Map.toList seqs) + seqs' = mkArray (map fst sorted) + re = array (0,Map.size seqs-1) (zipWith (\(_,i) j -> (i,j)) sorted [0..]) :: Array LIndex LIndex + funs' = array (0,Map.size funs-1) [(v,CncFun fun (amap ((!) re) lins)) | (CncFun fun lins,v) <- Map.toList funs] + + ciCmp (s1,_) (s2,_) + | Map.lookup (mkCId "case_sensitive") (cflags cnc) == Just (LStr "on") + = compare s1 s2 + | otherwise = compareCaseInsensitve s1 s2 + mkArray lst = listArray (0,length lst-1) lst mapAccumLSet f b set = let (b',lst) = mapAccumL f b (Set.toList set)