mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 19:22:50 -06:00
restore the sequence ordering after -optimize-pgf
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
{-# LANGUAGE BangPatterns, FlexibleContexts, MagicHash #-}
|
{-# LANGUAGE BangPatterns, FlexibleContexts #-}
|
||||||
module GF.Compile.GrammarToPGF (mkCanon2pgf) where
|
module GF.Compile.GrammarToPGF (mkCanon2pgf) where
|
||||||
|
|
||||||
--import GF.Compile.Export
|
--import GF.Compile.Export
|
||||||
@@ -8,16 +8,13 @@ import GF.Compile.GenerateBC
|
|||||||
import PGF(CId,mkCId,utf8CId)
|
import PGF(CId,mkCId,utf8CId)
|
||||||
import PGF.Internal(fidInt,fidFloat,fidString,fidVar)
|
import PGF.Internal(fidInt,fidFloat,fidString,fidVar)
|
||||||
import PGF.Internal(updateProductionIndices)
|
import PGF.Internal(updateProductionIndices)
|
||||||
--import qualified PGF.Macros as CM
|
|
||||||
import qualified PGF.Internal as C
|
import qualified PGF.Internal as C
|
||||||
import qualified PGF.Internal as D
|
import qualified PGF.Internal as D
|
||||||
import GF.Grammar.Predef
|
import GF.Grammar.Predef
|
||||||
--import GF.Grammar.Printer
|
|
||||||
import GF.Grammar.Grammar
|
import GF.Grammar.Grammar
|
||||||
import qualified GF.Grammar.Lookup as Look
|
import qualified GF.Grammar.Lookup as Look
|
||||||
import qualified GF.Grammar as A
|
import qualified GF.Grammar as A
|
||||||
import qualified GF.Grammar.Macros as GM
|
import qualified GF.Grammar.Macros as GM
|
||||||
--import GF.Compile.GeneratePMCFG
|
|
||||||
|
|
||||||
import GF.Infra.Ident
|
import GF.Infra.Ident
|
||||||
import GF.Infra.Option
|
import GF.Infra.Option
|
||||||
@@ -30,9 +27,6 @@ import qualified Data.Map as Map
|
|||||||
import qualified Data.IntMap as IntMap
|
import qualified Data.IntMap as IntMap
|
||||||
import Data.Array.IArray
|
import Data.Array.IArray
|
||||||
|
|
||||||
import Data.Char
|
|
||||||
import GHC.Prim
|
|
||||||
import GHC.Base(getTag)
|
|
||||||
|
|
||||||
mkCanon2pgf :: Options -> SourceGrammar -> ModuleName -> IOE D.PGF
|
mkCanon2pgf :: Options -> SourceGrammar -> ModuleName -> IOE D.PGF
|
||||||
mkCanon2pgf opts gr am = do
|
mkCanon2pgf opts gr am = do
|
||||||
@@ -65,7 +59,7 @@ mkCanon2pgf opts gr am = do
|
|||||||
mkConcr cm = do
|
mkConcr cm = do
|
||||||
let cflags = err (const noOptions) mflags (lookupModule gr cm)
|
let cflags = err (const noOptions) mflags (lookupModule gr cm)
|
||||||
ciCmp | flag optCaseSensitive cflags = compare
|
ciCmp | flag optCaseSensitive cflags = compare
|
||||||
| otherwise = compareCaseInsensitve
|
| otherwise = C.compareCaseInsensitve
|
||||||
|
|
||||||
(ex_seqs,cdefs) <- addMissingPMCFGs
|
(ex_seqs,cdefs) <- addMissingPMCFGs
|
||||||
Map.empty
|
Map.empty
|
||||||
@@ -74,7 +68,7 @@ mkCanon2pgf opts gr am = do
|
|||||||
|
|
||||||
let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags]
|
let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags]
|
||||||
|
|
||||||
seqs = (mkArray . sortNubBy ciCmp . concat) $
|
seqs = (mkArray . C.sortNubBy ciCmp . concat) $
|
||||||
(Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
|
(Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
|
||||||
|
|
||||||
ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence
|
ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence
|
||||||
@@ -312,124 +306,3 @@ genPrintNames cdefs =
|
|||||||
|
|
||||||
mkArray lst = listArray (0,length lst-1) lst
|
mkArray lst = listArray (0,length lst-1) lst
|
||||||
mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
||||||
|
|
||||||
-- The following is a version of Data.List.sortBy which together
|
|
||||||
-- with the sorting also eliminates duplicate values
|
|
||||||
sortNubBy cmp = mergeAll . sequences
|
|
||||||
where
|
|
||||||
sequences (a:b:xs) =
|
|
||||||
case cmp a b of
|
|
||||||
GT -> descending b [a] xs
|
|
||||||
EQ -> sequences (b:xs)
|
|
||||||
LT -> ascending b (a:) xs
|
|
||||||
sequences xs = [xs]
|
|
||||||
|
|
||||||
descending a as [] = [a:as]
|
|
||||||
descending a as (b:bs) =
|
|
||||||
case cmp a b of
|
|
||||||
GT -> descending b (a:as) bs
|
|
||||||
EQ -> descending a as bs
|
|
||||||
LT -> (a:as) : sequences (b:bs)
|
|
||||||
|
|
||||||
ascending a as [] = let !x = as [a]
|
|
||||||
in [x]
|
|
||||||
ascending a as (b:bs) =
|
|
||||||
case cmp a b of
|
|
||||||
GT -> let !x = as [a]
|
|
||||||
in x : sequences (b:bs)
|
|
||||||
EQ -> ascending a as bs
|
|
||||||
LT -> ascending b (\ys -> as (a:ys)) bs
|
|
||||||
|
|
||||||
mergeAll [x] = x
|
|
||||||
mergeAll xs = mergeAll (mergePairs xs)
|
|
||||||
|
|
||||||
mergePairs (a:b:xs) = let !x = merge a b
|
|
||||||
in x : mergePairs xs
|
|
||||||
mergePairs xs = xs
|
|
||||||
|
|
||||||
merge as@(a:as') bs@(b:bs') =
|
|
||||||
case cmp a b of
|
|
||||||
GT -> b:merge as bs'
|
|
||||||
EQ -> a:merge as' bs'
|
|
||||||
LT -> a:merge as' bs
|
|
||||||
merge [] bs = bs
|
|
||||||
merge as [] = as
|
|
||||||
|
|
||||||
-- The following function does case-insensitive comparison of sequences.
|
|
||||||
-- This is used to allow case-insensitive parsing, while
|
|
||||||
-- the linearizer still has access to the original cases.
|
|
||||||
compareCaseInsensitve s1 s2 =
|
|
||||||
case compareSeq (elems s1) (elems s2) of
|
|
||||||
(EQ,c) -> c
|
|
||||||
(c, _) -> c
|
|
||||||
where
|
|
||||||
compareSeq [] [] = dup EQ
|
|
||||||
compareSeq [] _ = dup LT
|
|
||||||
compareSeq _ [] = dup GT
|
|
||||||
compareSeq (x:xs) (y:ys) =
|
|
||||||
case compareSym x y of
|
|
||||||
(EQ,EQ) -> compareSeq xs ys
|
|
||||||
(EQ,c2) -> case compareSeq xs ys of
|
|
||||||
(c1,_) -> (c1,c2)
|
|
||||||
x -> x
|
|
||||||
|
|
||||||
compareSym s1 s2 =
|
|
||||||
case s1 of
|
|
||||||
D.SymCat d1 r1
|
|
||||||
-> case s2 of
|
|
||||||
D.SymCat d2 r2
|
|
||||||
-> case compare d1 d2 of
|
|
||||||
EQ -> dup (r1 `compare` r2)
|
|
||||||
x -> dup x
|
|
||||||
_ -> dup LT
|
|
||||||
D.SymLit d1 r1
|
|
||||||
-> case s2 of
|
|
||||||
D.SymCat {} -> dup GT
|
|
||||||
D.SymLit d2 r2
|
|
||||||
-> case compare d1 d2 of
|
|
||||||
EQ -> dup (r1 `compare` r2)
|
|
||||||
x -> dup x
|
|
||||||
_ -> dup LT
|
|
||||||
D.SymVar d1 r1
|
|
||||||
-> if tagToEnum# (getTag s2 ># 2#)
|
|
||||||
then dup LT
|
|
||||||
else case s2 of
|
|
||||||
D.SymVar d2 r2
|
|
||||||
-> case compare d1 d2 of
|
|
||||||
EQ -> dup (r1 `compare` r2)
|
|
||||||
x -> dup x
|
|
||||||
_ -> dup GT
|
|
||||||
D.SymKS t1
|
|
||||||
-> if tagToEnum# (getTag s2 ># 3#)
|
|
||||||
then dup LT
|
|
||||||
else case s2 of
|
|
||||||
D.SymKS t2 -> t1 `compareToken` t2
|
|
||||||
_ -> dup GT
|
|
||||||
D.SymKP a1 b1
|
|
||||||
-> if tagToEnum# (getTag s2 ># 4#)
|
|
||||||
then dup LT
|
|
||||||
else case s2 of
|
|
||||||
D.SymKP a2 b2
|
|
||||||
-> case compare a1 a2 of
|
|
||||||
EQ -> dup (b1 `compare` b2)
|
|
||||||
x -> dup x
|
|
||||||
_ -> dup GT
|
|
||||||
_ -> let t1 = getTag s1
|
|
||||||
t2 = getTag s2
|
|
||||||
in if tagToEnum# (t1 <# t2)
|
|
||||||
then dup LT
|
|
||||||
else if tagToEnum# (t1 ==# t2)
|
|
||||||
then dup EQ
|
|
||||||
else dup GT
|
|
||||||
|
|
||||||
compareToken [] [] = dup EQ
|
|
||||||
compareToken [] _ = dup LT
|
|
||||||
compareToken _ [] = dup GT
|
|
||||||
compareToken (x:xs) (y:ys)
|
|
||||||
| x == y = compareToken xs ys
|
|
||||||
| otherwise = case compare (toLower x) (toLower y) of
|
|
||||||
EQ -> case compareToken xs ys of
|
|
||||||
(c,_) -> (c,compare x y)
|
|
||||||
c -> dup c
|
|
||||||
|
|
||||||
dup x = (x,x)
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
{-# LANGUAGE MagicHash, BangPatterns, FlexibleContexts #-}
|
||||||
module PGF.Macros where
|
module PGF.Macros where
|
||||||
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
|
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
|
||||||
|
|
||||||
@@ -13,6 +14,9 @@ import qualified Data.Array as Array
|
|||||||
import Data.List
|
import Data.List
|
||||||
import Data.Array.IArray
|
import Data.Array.IArray
|
||||||
import Text.PrettyPrint
|
import Text.PrettyPrint
|
||||||
|
import GHC.Prim
|
||||||
|
import GHC.Base(getTag)
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
-- operations for manipulating PGF grammars and objects
|
-- operations for manipulating PGF grammars and objects
|
||||||
|
|
||||||
@@ -241,3 +245,126 @@ computeSeq filter seq args = concatMap compute seq
|
|||||||
flattenBracketedString :: BracketedString -> [String]
|
flattenBracketedString :: BracketedString -> [String]
|
||||||
flattenBracketedString (Leaf w) = [w]
|
flattenBracketedString (Leaf w) = [w]
|
||||||
flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss
|
flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||||
|
|
||||||
|
|
||||||
|
-- The following is a version of Data.List.sortBy which together
|
||||||
|
-- with the sorting also eliminates duplicate values
|
||||||
|
sortNubBy cmp = mergeAll . sequences
|
||||||
|
where
|
||||||
|
sequences (a:b:xs) =
|
||||||
|
case cmp a b of
|
||||||
|
GT -> descending b [a] xs
|
||||||
|
EQ -> sequences (b:xs)
|
||||||
|
LT -> ascending b (a:) xs
|
||||||
|
sequences xs = [xs]
|
||||||
|
|
||||||
|
descending a as [] = [a:as]
|
||||||
|
descending a as (b:bs) =
|
||||||
|
case cmp a b of
|
||||||
|
GT -> descending b (a:as) bs
|
||||||
|
EQ -> descending a as bs
|
||||||
|
LT -> (a:as) : sequences (b:bs)
|
||||||
|
|
||||||
|
ascending a as [] = let !x = as [a]
|
||||||
|
in [x]
|
||||||
|
ascending a as (b:bs) =
|
||||||
|
case cmp a b of
|
||||||
|
GT -> let !x = as [a]
|
||||||
|
in x : sequences (b:bs)
|
||||||
|
EQ -> ascending a as bs
|
||||||
|
LT -> ascending b (\ys -> as (a:ys)) bs
|
||||||
|
|
||||||
|
mergeAll [x] = x
|
||||||
|
mergeAll xs = mergeAll (mergePairs xs)
|
||||||
|
|
||||||
|
mergePairs (a:b:xs) = let !x = merge a b
|
||||||
|
in x : mergePairs xs
|
||||||
|
mergePairs xs = xs
|
||||||
|
|
||||||
|
merge as@(a:as') bs@(b:bs') =
|
||||||
|
case cmp a b of
|
||||||
|
GT -> b:merge as bs'
|
||||||
|
EQ -> a:merge as' bs'
|
||||||
|
LT -> a:merge as' bs
|
||||||
|
merge [] bs = bs
|
||||||
|
merge as [] = as
|
||||||
|
|
||||||
|
|
||||||
|
-- The following function does case-insensitive comparison of sequences.
|
||||||
|
-- This is used to allow case-insensitive parsing, while
|
||||||
|
-- the linearizer still has access to the original cases.
|
||||||
|
compareCaseInsensitve s1 s2 =
|
||||||
|
case compareSeq (elems s1) (elems s2) of
|
||||||
|
(EQ,c) -> c
|
||||||
|
(c, _) -> c
|
||||||
|
where
|
||||||
|
compareSeq [] [] = dup EQ
|
||||||
|
compareSeq [] _ = dup LT
|
||||||
|
compareSeq _ [] = dup GT
|
||||||
|
compareSeq (x:xs) (y:ys) =
|
||||||
|
case compareSym x y of
|
||||||
|
(EQ,EQ) -> compareSeq xs ys
|
||||||
|
(EQ,c2) -> case compareSeq xs ys of
|
||||||
|
(c1,_) -> (c1,c2)
|
||||||
|
x -> x
|
||||||
|
|
||||||
|
compareSym s1 s2 =
|
||||||
|
case s1 of
|
||||||
|
SymCat d1 r1
|
||||||
|
-> case s2 of
|
||||||
|
SymCat d2 r2
|
||||||
|
-> case compare d1 d2 of
|
||||||
|
EQ -> dup (r1 `compare` r2)
|
||||||
|
x -> dup x
|
||||||
|
_ -> dup LT
|
||||||
|
SymLit d1 r1
|
||||||
|
-> case s2 of
|
||||||
|
SymCat {} -> dup GT
|
||||||
|
SymLit d2 r2
|
||||||
|
-> case compare d1 d2 of
|
||||||
|
EQ -> dup (r1 `compare` r2)
|
||||||
|
x -> dup x
|
||||||
|
_ -> dup LT
|
||||||
|
SymVar d1 r1
|
||||||
|
-> if tagToEnum# (getTag s2 ># 2#)
|
||||||
|
then dup LT
|
||||||
|
else case s2 of
|
||||||
|
SymVar d2 r2
|
||||||
|
-> case compare d1 d2 of
|
||||||
|
EQ -> dup (r1 `compare` r2)
|
||||||
|
x -> dup x
|
||||||
|
_ -> dup GT
|
||||||
|
SymKS t1
|
||||||
|
-> if tagToEnum# (getTag s2 ># 3#)
|
||||||
|
then dup LT
|
||||||
|
else case s2 of
|
||||||
|
SymKS t2 -> t1 `compareToken` t2
|
||||||
|
_ -> dup GT
|
||||||
|
SymKP a1 b1
|
||||||
|
-> if tagToEnum# (getTag s2 ># 4#)
|
||||||
|
then dup LT
|
||||||
|
else case s2 of
|
||||||
|
SymKP a2 b2
|
||||||
|
-> case compare a1 a2 of
|
||||||
|
EQ -> dup (b1 `compare` b2)
|
||||||
|
x -> dup x
|
||||||
|
_ -> dup GT
|
||||||
|
_ -> let t1 = getTag s1
|
||||||
|
t2 = getTag s2
|
||||||
|
in if tagToEnum# (t1 <# t2)
|
||||||
|
then dup LT
|
||||||
|
else if tagToEnum# (t1 ==# t2)
|
||||||
|
then dup EQ
|
||||||
|
else dup GT
|
||||||
|
|
||||||
|
compareToken [] [] = dup EQ
|
||||||
|
compareToken [] _ = dup LT
|
||||||
|
compareToken _ [] = dup GT
|
||||||
|
compareToken (x:xs) (y:ys)
|
||||||
|
| x == y = compareToken xs ys
|
||||||
|
| otherwise = case compare (toLower x) (toLower y) of
|
||||||
|
EQ -> case compareToken xs ys of
|
||||||
|
(c,_) -> (c,compare x y)
|
||||||
|
c -> dup c
|
||||||
|
|
||||||
|
dup x = (x,x)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
{-# LANGUAGE BangPatterns #-}
|
{-# LANGUAGE BangPatterns, FlexibleContexts #-}
|
||||||
module PGF.Optimize
|
module PGF.Optimize
|
||||||
( optimizePGF
|
( optimizePGF
|
||||||
, updateProductionIndices
|
, updateProductionIndices
|
||||||
@@ -44,9 +44,9 @@ topDownFilter startCat cnc =
|
|||||||
env2
|
env2
|
||||||
(productions cnc)
|
(productions cnc)
|
||||||
cats = Map.mapWithKey filterCatLabels (cnccats cnc)
|
cats = Map.mapWithKey filterCatLabels (cnccats cnc)
|
||||||
(seqs,funs) = env3
|
(seqs,funs) = reorderSeqs env3
|
||||||
in cnc{ sequences = mkSetArray seqs
|
in cnc{ sequences = seqs
|
||||||
, cncfuns = mkSetArray funs
|
, cncfuns = funs
|
||||||
, productions = prods
|
, productions = prods
|
||||||
, cnccats = cats
|
, cnccats = cats
|
||||||
, lindefs = defs
|
, lindefs = defs
|
||||||
@@ -171,7 +171,18 @@ topDownFilter startCat cnc =
|
|||||||
in CncCat start end lbls'
|
in CncCat start end lbls'
|
||||||
Nothing -> error "unknown category"
|
Nothing -> error "unknown category"
|
||||||
|
|
||||||
mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
reorderSeqs (seqs,funs) = (seqs',funs')
|
||||||
|
where
|
||||||
|
sorted = sortNubBy ciCmp (Map.toList seqs)
|
||||||
|
seqs' = mkArray (map fst sorted)
|
||||||
|
re = array (0,Map.size seqs-1) (zipWith (\(_,i) j -> (i,j)) sorted [0..]) :: Array LIndex LIndex
|
||||||
|
funs' = array (0,Map.size funs-1) [(v,CncFun fun (amap ((!) re) lins)) | (CncFun fun lins,v) <- Map.toList funs]
|
||||||
|
|
||||||
|
ciCmp (s1,_) (s2,_)
|
||||||
|
| Map.lookup (mkCId "case_sensitive") (cflags cnc) == Just (LStr "on")
|
||||||
|
= compare s1 s2
|
||||||
|
| otherwise = compareCaseInsensitve s1 s2
|
||||||
|
|
||||||
mkArray lst = listArray (0,length lst-1) lst
|
mkArray lst = listArray (0,length lst-1) lst
|
||||||
|
|
||||||
mapAccumLSet f b set = let (b',lst) = mapAccumL f b (Set.toList set)
|
mapAccumLSet f b set = let (b',lst) = mapAccumL f b (Set.toList set)
|
||||||
|
|||||||
Reference in New Issue
Block a user