restore the sharing of sequences. Shrinks the grammar by ~45%

This commit is contained in:
Krasimir Angelov
2022-01-08 19:49:42 +01:00
parent cd2c6aa32a
commit 00f857559d
31 changed files with 882 additions and 353 deletions

View File

@@ -22,77 +22,99 @@ import GF.Text.Pretty
import GF.Compile.Compute.Concrete
import GF.Data.Operations(Err(..))
import PGF2.Transactions
import qualified Data.Map.Strict as Map
import Control.Monad
import Data.List(mapAccumL,sortBy)
import Data.Maybe(fromMaybe)
import Control.Monad.State
import qualified Data.Map.Strict as Map
import qualified Data.Sequence as Seq
import Data.List(mapAccumL,sortOn)
import Data.Maybe(fromMaybe,isNothing)
generatePMCFG :: Options -> FilePath -> SourceGrammar -> SourceModule -> Check SourceModule
generatePMCFG opts cwd gr cmo@(cm,cmi)
| mstatus cmi == MSComplete && isModCnc cmi =
| mstatus cmi == MSComplete && isModCnc cmi && isNothing (mseqs cmi) =
do let gr' = prependModule gr cmo
js <- mapM (addPMCFG opts cwd gr' cmi) (Map.toList (jments cmi))
return (cm,cmi{jments = (Map.fromAscList js)})
(js,seqs) <- runStateT (Map.traverseWithKey (\id info -> StateT (addPMCFG opts cwd gr' cmi id info)) (jments cmi)) Map.empty
return (cm,cmi{jments = js, mseqs=Just (mapToSequence seqs)})
| otherwise = return cmo
where
mapToSequence m = Seq.fromList (map fst (sortOn snd (Map.toList m)))
addPMCFG opts cwd gr cmi (id,CncCat mty@(Just (L loc ty)) mdef mref mprn Nothing) = do
defs <- case mdef of
type SequenceSet = Map.Map [Symbol] Int
addPMCFG opts cwd gr cmi id (CncCat mty@(Just (L loc ty)) mdef mref mprn Nothing) seqs = do
(defs,seqs) <-
case mdef of
Nothing -> checkInModule cwd cmi loc ("Happened in the PMCFG generation for the lindef of" <+> id) $ do
term <- mkLinDefault gr ty
pmcfgForm gr term [(Explicit,identW,typeStr)] ty
pmcfgForm gr term [(Explicit,identW,typeStr)] ty seqs
Just (L loc term) -> checkInModule cwd cmi loc ("Happened in the PMCFG generation for the lindef of" <+> id) $ do
pmcfgForm gr term [(Explicit,identW,typeStr)] ty
refs <- case mref of
pmcfgForm gr term [(Explicit,identW,typeStr)] ty seqs
(refs,seqs) <-
case mref of
Nothing -> checkInModule cwd cmi loc ("Happened in the PMCFG generation for the linref of" <+> id) $ do
term <- mkLinReference gr ty
pmcfgForm gr term [(Explicit,identW,ty)] typeStr
pmcfgForm gr term [(Explicit,identW,ty)] typeStr seqs
Just (L loc term) -> checkInModule cwd cmi loc ("Happened in the PMCFG generation for the linref of" <+> id) $ do
pmcfgForm gr term [(Explicit,identW,ty)] typeStr
pmcfgForm gr term [(Explicit,identW,ty)] typeStr seqs
mprn <- case mprn of
Nothing -> return Nothing
Just (L loc prn) -> checkInModule cwd cmi loc ("Happened in the computation of the print name for" <+> id) $ do
prn <- normalForm gr prn
return (Just (L loc prn))
return (id,CncCat mty mdef mref mprn (Just (defs,refs)))
addPMCFG opts cwd gr cmi (id,CncFun mty@(Just (_,cat,ctxt,val)) mlin@(Just (L loc term)) mprn Nothing) = do
rules <- checkInModule cwd cmi loc ("Happened in the PMCFG generation for" <+> id) $
pmcfgForm gr term ctxt val
return (CncCat mty mdef mref mprn (Just (defs,refs)),seqs)
addPMCFG opts cwd gr cmi id (CncFun mty@(Just (_,cat,ctxt,val)) mlin@(Just (L loc term)) mprn Nothing) seqs = do
(rules,seqs) <-
checkInModule cwd cmi loc ("Happened in the PMCFG generation for" <+> id) $
pmcfgForm gr term ctxt val seqs
mprn <- case mprn of
Nothing -> return Nothing
Just (L loc prn) -> checkInModule cwd cmi loc ("Happened in the computation of the print name for" <+> id) $ do
prn <- normalForm gr prn
return (Just (L loc prn))
return (id,CncFun mty mlin mprn (Just rules))
addPMCFG opts cwd gr cmi id_info = return id_info
return (CncFun mty mlin mprn (Just rules),seqs)
addPMCFG opts cwd gr cmi id info seqs = return (info,seqs)
pmcfgForm :: Grammar -> Term -> Context -> Type -> Check [Production]
pmcfgForm gr t ctxt ty =
runEvalM gr $ do
((_,ms),args) <- mapAccumM (\(d,ms) (_,_,ty) -> do
let (ms',_,t) = type2metaTerm gr d ms 0 [] ty
tnk <- newThunk [] t
return ((d+1,ms'),tnk))
(0,Map.empty) ctxt
sequence_ [newNarrowing i ty | (i,ty) <- Map.toList ms]
v <- eval [] t args
(lins,params) <- flatten v ty ([],[])
lins <- mapM str2lin lins
(r,rs,_) <- compute params
args <- zipWithM tnk2lparam args ctxt
vars <- getVariables
return (Production vars args (LParam r (order rs)) (reverse lins))
where
tnk2lparam tnk (_,_,ty) = do
v <- force tnk
(_,params) <- flatten v ty ([],[])
(r,rs,_) <- compute params
return (PArg [] (LParam r (order rs)))
pmcfgForm :: Grammar -> Term -> Context -> Type -> SequenceSet -> Check ([Production],SequenceSet)
pmcfgForm gr t ctxt ty seqs = do
res <- runEvalM gr $ do
((_,ms),args) <- mapAccumM (\(d,ms) (_,_,ty) -> do
let (ms',_,t) = type2metaTerm gr d ms 0 [] ty
tnk <- newThunk [] t
return ((d+1,ms'),tnk))
(0,Map.empty) ctxt
sequence_ [newNarrowing i ty | (i,ty) <- Map.toList ms]
v <- eval [] t args
(lins,params) <- flatten v ty ([],[])
lins <- fmap reverse $ mapM str2lin lins
(r,rs,_) <- compute params
args <- zipWithM tnk2lparam args ctxt
vars <- getVariables
let res = LParam r (order rs)
return (vars,args,res,lins)
return (runState (mapM mkProduction res) seqs)
where
tnk2lparam tnk (_,_,ty) = do
v <- force tnk
(_,params) <- flatten v ty ([],[])
(r,rs,_) <- compute params
return (PArg [] (LParam r (order rs)))
compute [] = return (0,[],1)
compute ((v,ty):params) = do
(r, rs ,cnt ) <- param2int v ty
(r',rs',cnt') <- compute params
return (r*cnt'+r',combine cnt' rs rs',cnt*cnt')
compute [] = return (0,[],1)
compute ((v,ty):params) = do
(r, rs ,cnt ) <- param2int v ty
(r',rs',cnt') <- compute params
return (r*cnt'+r',combine cnt' rs rs',cnt*cnt')
mkProduction (vars,args,res,lins) = do
lins <- mapM getSeqId lins
return (Production vars args res lins)
where
getSeqId :: [Symbol] -> State (Map.Map [Symbol] SeqId) SeqId
getSeqId lin = state $ \m ->
case Map.lookup lin m of
Just seqid -> (seqid,m)
Nothing -> let seqid = Map.size m
in (seqid,Map.insert lin seqid m)
type2metaTerm :: SourceGrammar -> Int -> Map.Map MetaId Type -> LIndex -> [(LIndex,(Ident,Type))] -> Type -> (Map.Map MetaId Type,Int,Term)
type2metaTerm gr d ms r rs (Sort s) | s == cStr =
@@ -238,7 +260,7 @@ combine cnt' ((r,pv):rs) ((r',pv'):rs') =
EQ -> (r*cnt'+r',pv ) : combine cnt' rs ((r',pv'):rs')
GT -> ( r',pv') : combine cnt' ((r,pv):rs) rs'
order = sortBy (\(r1,_) (r2,_) -> compare r2 r1)
order = sortOn fst
mapAccumM f a [] = return (a,[])
mapAccumM f a (x:xs) = do (a, y) <- f a x

View File

@@ -18,12 +18,13 @@ import GF.Infra.Option
import GF.Infra.UseIO (IOE)
import GF.Data.Operations
import Control.Monad(forM_)
import Control.Monad(forM_,foldM)
import Data.List
import Data.Char
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Sequence as Seq
import Data.Array.IArray
import Data.Maybe(fromMaybe)
import System.FilePath
@@ -56,14 +57,18 @@ grammar2PGF opts mb_pgf gr am probs = do
createConcrete (mi2i cm) $ do
let cflags = err (const noOptions) mflags (lookupModule gr cm)
sequence_ [setConcreteFlag name value | (name,value) <- optionsPGF cflags]
let id_prod = Production [] [PArg [] (LParam 0 [])] (LParam 0 []) [[SymCat 0 (LParam 0 [])]]
prods = ([id_prod],[id_prod])
infos = (((cPredefAbs,cInt), CncCat (Just (noLoc GM.defLinType)) Nothing Nothing Nothing (Just prods))
:((cPredefAbs,cString),CncCat (Just (noLoc GM.defLinType)) Nothing Nothing Nothing (Just prods))
:((cPredefAbs,cFloat), CncCat (Just (noLoc GM.defLinType)) Nothing Nothing Nothing (Just prods))
: Look.allOrigInfos gr cm)
forM_ infos createCncCats
forM_ infos createCncFuns
let infos = ( Seq.fromList [Left [SymCat 0 (LParam 0 [])]]
, let id_prod = Production [] [PArg [] (LParam 0 [])] (LParam 0 []) [0]
prods = ([id_prod],[id_prod])
in [(cInt, CncCat (Just (noLoc GM.defLinType)) Nothing Nothing Nothing (Just prods))
,(cString,CncCat (Just (noLoc GM.defLinType)) Nothing Nothing Nothing (Just prods))
,(cFloat, CncCat (Just (noLoc GM.defLinType)) Nothing Nothing Nothing (Just prods))
]
)
: prepareSeqTbls (Look.allOrigInfos gr cm)
infos <- processInfos createCncCats infos
infos <- processInfos createCncFuns infos
return ()
return pgf
where
aflags = err (const noOptions) mflags (lookupModule gr am)
@@ -95,19 +100,38 @@ grammar2PGF opts mb_pgf gr am probs = do
0 -> 0
n -> max 0 ((1 - sum [d | (f,Just d) <- pfs]) / fromIntegral n)
createCncCats ((m,c),CncCat (Just (L _ ty)) _ _ mprn (Just (lindefs,linrefs))) = do
createLincat (i2i c) (type2fields gr ty) lindefs linrefs
prepareSeqTbls infos =
(map addSeqTable . Map.toList . Map.fromListWith (++))
[(m,[(c,info)]) | ((m,c),info) <- infos]
where
addSeqTable (m,infos) =
case lookupModule gr m of
Ok mi -> case mseqs mi of
Just seqs -> (fmap Left seqs,infos)
Nothing -> (Seq.empty,[])
Bad msg -> error msg
processInfos f [] = return []
processInfos f ((seqtbl,infos):rest) = do
seqtbl <- foldM f seqtbl infos
rest <- processInfos f rest
return ((seqtbl,infos):rest)
createCncCats seqtbl (c,CncCat (Just (L _ ty)) _ _ mprn (Just (lindefs,linrefs))) = do
seqtbl <- createLincat (i2i c) (type2fields gr ty) lindefs linrefs seqtbl
case mprn of
Nothing -> return ()
Just (L _ prn) -> setPrintName (i2i c) (unwords (term2tokens prn))
createCncCats _ = return ()
return seqtbl
createCncCats seqtbl _ = return seqtbl
createCncFuns ((m,f),CncFun _ _ mprn (Just prods)) = do
createLin (i2i f) prods
createCncFuns seqtbl (f,CncFun _ _ mprn (Just prods)) = do
seqtbl <- createLin (i2i f) prods seqtbl
case mprn of
Nothing -> return ()
Just (L _ prn) -> setPrintName (i2i f) (unwords (term2tokens prn))
createCncFuns _ = return ()
return seqtbl
createCncFuns seqtbl _ = return seqtbl
term2tokens (K tok) = [tok]
term2tokens (C t1 t2) = term2tokens t1 ++ term2tokens t2

View File

@@ -78,7 +78,7 @@ extendModule cwd gr (name,m)
-- | rebuilding instance + interface, and "with" modules, prior to renaming.
-- AR 24/10/2003
rebuildModule :: FilePath -> SourceGrammar -> SourceModule -> Check SourceModule
rebuildModule cwd gr mo@(i,mi@(ModInfo mt stat fs_ me mw ops_ med_ msrc_ js_)) =
rebuildModule cwd gr mo@(i,mi@(ModInfo mt stat fs_ me mw ops_ med_ msrc_ mseqs js_)) =
checkInModule cwd mi NoLoc empty $ do
---- deps <- moduleDeps ms
@@ -115,7 +115,7 @@ rebuildModule cwd gr mo@(i,mi@(ModInfo mt stat fs_ me mw ops_ med_ msrc_ js_)) =
else MSIncomplete
unless (stat' == MSComplete || stat == MSIncomplete)
(checkError ("module" <+> i <+> "remains incomplete"))
ModInfo mt0 _ fs me' _ ops0 _ fpath js <- lookupModule gr ext
ModInfo mt0 _ fs me' _ ops0 _ fpath _ js <- lookupModule gr ext
let ops1 = nub $
ops_ ++ -- N.B. js has been name-resolved already
[OQualif i j | (i,j) <- ops] ++
@@ -131,7 +131,7 @@ rebuildModule cwd gr mo@(i,mi@(ModInfo mt stat fs_ me mw ops_ med_ msrc_ js_)) =
js
let js1 = Map.union js0 js_
let med1= nub (ext : infs ++ insts ++ med_)
return $ ModInfo mt0 stat' fs1 me Nothing ops1 med1 msrc_ js1
return $ ModInfo mt0 stat' fs1 me Nothing ops1 med1 msrc_ mseqs js1
return (i,mi')

View File

@@ -34,8 +34,8 @@ instance Binary Grammar where
instance Binary ModuleInfo where
put mi = do put (mtype mi,mstatus mi,mflags mi,mextend mi,mwith mi,mopens mi,mexdeps mi,msrc mi,jments mi)
get = do (mtype,mstatus,mflags,mextend,mwith,mopens,med,msrc,jments) <- get
return (ModInfo mtype mstatus mflags mextend mwith mopens med msrc jments)
get = do (mtype,mstatus,mflags,mextend,mwith,mopens,med,msrc,mseqs,jments) <- get
return (ModInfo mtype mstatus mflags mextend mwith mopens med msrc mseqs jments)
instance Binary ModuleType where
put MTAbstract = putWord8 0
@@ -365,7 +365,7 @@ decodeModuleHeader :: MonadIO io => FilePath -> io (VersionTagged Module)
decodeModuleHeader = liftIO . fmap (fmap conv) . decodeFile'
where
conv (m,mtype,mstatus,mflags,mextend,mwith,mopens,med,msrc) =
(m,ModInfo mtype mstatus mflags mextend mwith mopens med msrc Map.empty)
(m,ModInfo mtype mstatus mflags mextend mwith mopens med msrc Nothing Map.empty)
encodeModule :: MonadIO io => FilePath -> SourceModule -> io ()
encodeModule fpath mo = liftIO $ encodeFile fpath (Tagged mo)

View File

@@ -74,11 +74,12 @@ import GF.Infra.Location
import GF.Data.Operations
import PGF2(BindType(..))
import PGF2.Transactions(LIndex,LVar,LParam(..),PArg(..),Symbol(..),Production(..))
import PGF2.Transactions(SeqId,LIndex,LVar,LParam(..),PArg(..),Symbol(..),Production(..))
import Data.Array.IArray(Array)
import Data.Array.Unboxed(UArray)
import qualified Data.Map as Map
import qualified Data.Sequence as Seq
import GF.Text.Pretty
@@ -100,6 +101,7 @@ data ModuleInfo = ModInfo {
mopens :: [OpenSpec],
mexdeps :: [ModuleName],
msrc :: FilePath,
mseqs :: Maybe (Seq.Seq [Symbol]),
jments :: Map.Map Ident Info
}

View File

@@ -132,14 +132,14 @@ ModDef
(opens,jments,opts) = case content of { Just c -> c; Nothing -> ([],[],noOptions) }
jments <- mapM (checkInfoType mtype) jments
defs <- buildAnyTree id jments
return (id, ModInfo mtype mstat opts extends with opens [] "" defs) }
return (id, ModInfo mtype mstat opts extends with opens [] "" Nothing defs) }
ModHeader :: { SourceModule }
ModHeader
: ComplMod ModType '=' ModHeaderBody { let { mstat = $1 ;
(mtype,id) = $2 ;
(extends,with,opens) = $4 }
in (id, ModInfo mtype mstat noOptions extends with opens [] "" Map.empty) }
in (id, ModInfo mtype mstat noOptions extends with opens [] "" Nothing Map.empty) }
ComplMod :: { ModuleStatus }
ComplMod

View File

@@ -25,6 +25,7 @@ module GF.Grammar.Printer
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF2(Literal(..))
import PGF2.Transactions(SeqId)
import GF.Infra.Ident
import GF.Infra.Option
import GF.Grammar.Values
@@ -34,8 +35,9 @@ import GF.Grammar.Grammar
import GF.Text.Pretty
import Data.Maybe (isNothing)
import Data.List (intersperse)
import Data.Foldable (toList)
import qualified Data.Map as Map
import qualified Data.Array.IArray as Array
import qualified Data.Sequence as Seq
import qualified GHC.Show
data TermPrintQual
@@ -46,10 +48,11 @@ instance Pretty Grammar where
pp = vcat . map (ppModule Qualified) . modules
ppModule :: TermPrintQual -> SourceModule -> Doc
ppModule q (mn, ModInfo mtype mstat opts exts with opens _ _ jments) =
ppModule q (mn, ModInfo mtype mstat opts exts with opens _ _ mseqs jments) =
hdr $$
nest 2 (ppOptions opts $$
vcat (map (ppJudgement q) (Map.toList jments))) $$
vcat (map (ppJudgement q) (Map.toList jments)) $$
maybe empty (ppSequences q) mseqs) $$
ftr
where
hdr = complModDoc <+> modTypeDoc <+> '=' <+>
@@ -160,7 +163,7 @@ ppJudgement q (id, AnyInd cann mid) =
Internal -> "ind" <+> id <+> '=' <+> (if cann then pp "canonical" else empty) <+> mid <+> ';'
_ -> empty
ppPmcfgRule id arg_cats res_cat (Production vars args res lins) =
ppPmcfgRule id arg_cats res_cat (Production vars args res seqids) =
pp id <+> (':' <+>
(if null vars
then empty
@@ -169,7 +172,7 @@ ppPmcfgRule id arg_cats res_cat (Production vars args res lins) =
then empty
else hsep (intersperse (pp '*') (zipWith ppPArg arg_cats args)) <+> "->") <+>
ppPmcfgCat res_cat res $$
'=' <+> brackets (vcat (map (hsep . map ppSymbol) lins)))
'=' <+> brackets (hcat (intersperse (pp ',') (map ppSeqId seqids))))
ppPArg cat (PArg _ p) = ppPmcfgCat cat p
@@ -340,6 +343,18 @@ ppAltern q (x,y) = ppTerm q 0 x <+> '/' <+> ppTerm q 0 y
ppParams q ps = fsep (intersperse (pp '|') (map (ppParam q) ps))
ppParam q (id,cxt) = id <+> hsep (map (ppDDecl q) cxt)
ppSeqId :: SeqId -> Doc
ppSeqId seqid = 'S' <> pp seqid
ppSequences q seqs
| Seq.null seqs || q /= Internal = empty
| otherwise = "sequences" <+> '{' $$
nest 2 (vcat (zipWith ppSeq [0..] (toList seqs))) $$
'}'
where
ppSeq seqid seq =
ppSeqId seqid <+> ":=" <+> hsep (map ppSymbol seq)
commaPunct f ds = (hcat (punctuate "," (map f ds)))
prec d1 d2 doc

View File

@@ -28,8 +28,9 @@ import PGF2
import PGF2.Transactions hiding (modifyPGF,checkoutPGF)
import Data.Char
import Data.List(isPrefixOf)
import Data.List(isPrefixOf,sortOn)
import qualified Data.Map as Map
import qualified Data.Sequence as Seq
import qualified Text.ParserCombinators.ReadP as RP
import System.Directory(getAppUserDataDirectory)
import Control.Exception(SomeException,fromException,evaluate,try)
@@ -256,9 +257,9 @@ transactionCommand (CreateLin opts f t) pgf = do
in return (fields,type2term mo ty)
Nothing -> fail ("Function "++f++" is not in the abstract syntax")
case runCheck (compileLinTerm sgr mo t ty) of
Ok ((prods,fields'),_)
Ok ((prods,seqtbl,fields'),_)
| fields == fields' ->
do lift $ modifyPGF pgf (alterConcrete lang (createLin f prods))
do lift $ modifyPGF pgf (alterConcrete lang (createLin f prods seqtbl >> return ()))
return ()
| otherwise -> fail "The linearization categories in the resource and the compiled grammar does not match"
Bad msg -> fail msg
@@ -272,8 +273,10 @@ transactionCommand (CreateLin opts f t) pgf = do
t <- renameSourceTerm sgr mo (Typed t ty)
(t,ty) <- inferLType sgr [] t
let (ctxt,res_ty) = typeFormCnc ty
prods <- pmcfgForm sgr t ctxt res_ty
return (prods,type2fields sgr res_ty)
(prods,seqs) <- pmcfgForm sgr t ctxt res_ty Map.empty
return (prods,mapToSequence seqs,type2fields sgr res_ty)
where
mapToSequence m = Seq.fromList (map (Left . fst) (sortOn snd (Map.toList m)))
transactionCommand (CreateLincat opts c t) pgf = do
sgr <- getGrammar
@@ -281,7 +284,7 @@ transactionCommand (CreateLincat opts c t) pgf = do
mo <- maybe (fail "no source grammar in scope") return $
greatestResource sgr
case runCheck (compileLincatTerm sgr mo t) of
Ok (fields,_)-> do lift $ modifyPGF pgf (alterConcrete lang (createLincat c fields [] []))
Ok (fields,_)-> do lift $ modifyPGF pgf (alterConcrete lang (createLincat c fields [] [] Seq.empty >> return ()))
return ()
Bad msg -> fail msg
where