Add first version of LPGF datatype, with linearization function and some hardcoded examples

This commit is contained in:
John J. Camilleri
2021-01-22 14:07:41 +01:00
parent 8ad9cf1e09
commit 93b81b9f13
5 changed files with 517 additions and 305 deletions

View File

@@ -107,6 +107,7 @@ Library
PGF PGF
PGF.Internal PGF.Internal
PGF.Haskell PGF.Haskell
LPGF
other-modules: other-modules:
PGF.Data PGF.Data

View File

@@ -25,6 +25,7 @@ import GF.Text.Pretty(render,($$),(<+>),nest)
import PGF.Internal(optimizePGF) import PGF.Internal(optimizePGF)
import PGF(PGF,defaultProbabilities,setProbabilities,readProbabilitiesFromFile) import PGF(PGF,defaultProbabilities,setProbabilities,readProbabilitiesFromFile)
import LPGF(LPGF)
-- | Compiles a number of source files and builds a 'PGF' structure for them. -- | Compiles a number of source files and builds a 'PGF' structure for them.
-- This is a composition of 'link' and 'batchCompile'. -- This is a composition of 'link' and 'batchCompile'.
@@ -43,7 +44,8 @@ link opts (cnc,gr) =
return $ setProbabilities probs return $ setProbabilities probs
$ if flag optOptimizePGF opts then optimizePGF pgf else pgf $ if flag optOptimizePGF opts then optimizePGF pgf else pgf
linkl :: Options -> (ModuleName,Grammar) -> IOE PGF -- | Link a grammar into a 'LPGF' that can be used for linearization only.
linkl :: Options -> (ModuleName,Grammar) -> IOE LPGF
linkl opts (cnc,gr) = linkl opts (cnc,gr) =
putPointE Normal opts "linking ... " $ do putPointE Normal opts "linking ... " $ do
let abs = srcAbsName gr cnc let abs = srcAbsName gr cnc

View File

@@ -1,308 +1,312 @@
{-# LANGUAGE BangPatterns, FlexibleContexts #-} -- {-# LANGUAGE BangPatterns, FlexibleContexts #-}
module GF.Compile.GrammarToLPGF (mkCanon2lpgf) where module GF.Compile.GrammarToLPGF (mkCanon2lpgf) where
import LPGF
--import GF.Compile.Export --import GF.Compile.Export
import GF.Compile.GeneratePMCFG -- import GF.Compile.GeneratePMCFG
import GF.Compile.GenerateBC -- import GF.Compile.GenerateBC
--
import PGF(CId,mkCId,utf8CId) -- import PGF(CId,mkCId,utf8CId)
import PGF.Internal(fidInt,fidFloat,fidString,fidVar) -- import PGF.Internal(fidInt,fidFloat,fidString,fidVar)
import PGF.Internal(updateProductionIndices) -- import PGF.Internal(updateProductionIndices)
import qualified PGF.Internal as C -- import qualified PGF.Internal as C
import qualified PGF.Internal as D -- import qualified PGF.Internal as D
import GF.Grammar.Predef -- import GF.Grammar.Predef
import GF.Grammar.Grammar import GF.Grammar.Grammar
import qualified GF.Grammar.Lookup as Look -- import qualified GF.Grammar.Lookup as Look
import qualified GF.Grammar as A -- import qualified GF.Grammar as A
import qualified GF.Grammar.Macros as GM -- import qualified GF.Grammar.Macros as GM
--
import GF.Infra.Ident -- import GF.Infra.Ident
import GF.Infra.Option import GF.Infra.Option
import GF.Infra.UseIO (IOE) import GF.Infra.UseIO (IOE)
import GF.Data.Operations -- import GF.Data.Operations
--
-- import Data.List
-- import qualified Data.Set as Set
-- import qualified Data.Map as Map
-- import qualified Data.IntMap as IntMap
-- import Data.Array.IArray
import Data.List mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE LPGF
import qualified Data.Set as Set mkCanon2lpgf opts gr am =
import qualified Data.Map as Map return zero
import qualified Data.IntMap as IntMap
import Data.Array.IArray
-- (an,abs) <- mkAbstr am
mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE D.PGF -- cncs <- mapM mkConcr (allConcretes gr am)
mkCanon2lpgf opts gr am = do -- -- return $ updateProductionIndices (D.PGF Map.empty an abs (Map.fromList cncs))
(an,abs) <- mkAbstr am -- return $ D.PGF Map.empty an abs (Map.fromList cncs)
cncs <- mapM mkConcr (allConcretes gr am) -- where
return $ updateProductionIndices (D.PGF Map.empty an abs (Map.fromList cncs)) -- cenv = resourceValues opts gr
where --
cenv = resourceValues opts gr -- mkAbstr am = return (mi2i am, D.Abstr flags funs cats)
-- where
mkAbstr am = return (mi2i am, D.Abstr flags funs cats) -- aflags = err (const noOptions) mflags (lookupModule gr am)
where --
aflags = err (const noOptions) mflags (lookupModule gr am) -- adefs =
-- [((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
adefs = -- Look.allOrigInfos gr am
[((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++ --
Look.allOrigInfos gr am -- flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags]
--
flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags] -- funs = Map.fromList [(i2i f, (mkType [] ty, arity, mkDef gr arity mdef, 0)) |
-- ((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs,
funs = Map.fromList [(i2i f, (mkType [] ty, arity, mkDef gr arity mdef, 0)) | -- let arity = mkArity ma mdef ty]
((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs, --
let arity = mkArity ma mdef ty] -- cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0)) |
-- ((m,c),AbsCat (Just (L _ cont))) <- adefs]
cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0)) | --
((m,c),AbsCat (Just (L _ cont))) <- adefs] -- catfuns cat =
-- [(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True)) <- adefs, snd (GM.valCat ty) == cat]
catfuns cat = --
[(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True)) <- adefs, snd (GM.valCat ty) == cat] -- mkConcr cm = do
-- let cflags = err (const noOptions) mflags (lookupModule gr cm)
mkConcr cm = do -- ciCmp | flag optCaseSensitive cflags = compare
let cflags = err (const noOptions) mflags (lookupModule gr cm) -- | otherwise = C.compareCaseInsensitve
ciCmp | flag optCaseSensitive cflags = compare --
| otherwise = C.compareCaseInsensitve -- (ex_seqs,cdefs) <- addMissingPMCFGs
-- Map.empty
(ex_seqs,cdefs) <- addMissingPMCFGs -- ([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++
Map.empty -- Look.allOrigInfos gr cm)
([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++ --
Look.allOrigInfos gr cm) -- let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags]
--
let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags] -- seqs = (mkArray . C.sortNubBy ciCmp . concat) $
-- (Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
seqs = (mkArray . C.sortNubBy ciCmp . concat) $ --
(Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm]) -- ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence
--
ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence -- !(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs
-- !(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns)
!(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs -- = genCncFuns gr am cm ex_seqs_arr ciCmp seqs cdefs fid_cnt1 cnccats
!(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns) --
= genCncFuns gr am cm ex_seqs_arr ciCmp seqs cdefs fid_cnt1 cnccats -- printnames = genPrintNames cdefs
-- return (mi2i cm, D.Concr flags
printnames = genPrintNames cdefs -- printnames
return (mi2i cm, D.Concr flags -- cncfuns
printnames -- lindefs
cncfuns -- linrefs
lindefs -- seqs
linrefs -- productions
seqs -- IntMap.empty
productions -- Map.empty
IntMap.empty -- cnccats
Map.empty -- IntMap.empty
cnccats -- fid_cnt2)
IntMap.empty -- where
fid_cnt2) -- -- if some module was compiled with -no-pmcfg, then
where -- -- we have to create the PMCFG code just before linking
-- if some module was compiled with -no-pmcfg, then -- addMissingPMCFGs seqs [] = return (seqs,[])
-- we have to create the PMCFG code just before linking -- addMissingPMCFGs seqs (((m,id), info):is) = do
addMissingPMCFGs seqs [] = return (seqs,[]) -- (seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info
addMissingPMCFGs seqs (((m,id), info):is) = do -- (seqs,is ) <- addMissingPMCFGs seqs is
(seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info -- return (seqs, ((m,id), info) : is)
(seqs,is ) <- addMissingPMCFGs seqs is --
return (seqs, ((m,id), info) : is) -- i2i :: Ident -> CId
-- i2i = utf8CId . ident2utf8
i2i :: Ident -> CId --
i2i = utf8CId . ident2utf8 -- mi2i :: ModuleName -> CId
-- mi2i (MN i) = i2i i
mi2i :: ModuleName -> CId --
mi2i (MN i) = i2i i -- mkType :: [Ident] -> A.Type -> C.Type
-- mkType scope t =
mkType :: [Ident] -> A.Type -> C.Type -- case GM.typeForm t of
mkType scope t = -- (hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps
case GM.typeForm t of -- in C.DTyp hyps' (i2i cat) (map (mkExp scope') args)
(hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps --
in C.DTyp hyps' (i2i cat) (map (mkExp scope') args) -- mkExp :: [Ident] -> A.Term -> C.Expr
-- mkExp scope t =
mkExp :: [Ident] -> A.Term -> C.Expr -- case t of
mkExp scope t = -- Q (_,c) -> C.EFun (i2i c)
case t of -- QC (_,c) -> C.EFun (i2i c)
Q (_,c) -> C.EFun (i2i c) -- Vr x -> case lookup x (zip scope [0..]) of
QC (_,c) -> C.EFun (i2i c) -- Just i -> C.EVar i
Vr x -> case lookup x (zip scope [0..]) of -- Nothing -> C.EMeta 0
Just i -> C.EVar i -- Abs b x t-> C.EAbs b (i2i x) (mkExp (x:scope) t)
Nothing -> C.EMeta 0 -- App t1 t2-> C.EApp (mkExp scope t1) (mkExp scope t2)
Abs b x t-> C.EAbs b (i2i x) (mkExp (x:scope) t) -- EInt i -> C.ELit (C.LInt (fromIntegral i))
App t1 t2-> C.EApp (mkExp scope t1) (mkExp scope t2) -- EFloat f -> C.ELit (C.LFlt f)
EInt i -> C.ELit (C.LInt (fromIntegral i)) -- K s -> C.ELit (C.LStr s)
EFloat f -> C.ELit (C.LFlt f) -- Meta i -> C.EMeta i
K s -> C.ELit (C.LStr s) -- _ -> C.EMeta 0
Meta i -> C.EMeta i --
_ -> C.EMeta 0 -- mkPatt scope p =
-- case p of
mkPatt scope p = -- A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps
case p of -- in (scope',C.PApp (i2i c) ps')
A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps -- A.PV x -> (x:scope,C.PVar (i2i x))
in (scope',C.PApp (i2i c) ps') -- A.PAs x p -> let (scope',p') = mkPatt scope p
A.PV x -> (x:scope,C.PVar (i2i x)) -- in (x:scope',C.PAs (i2i x) p')
A.PAs x p -> let (scope',p') = mkPatt scope p -- A.PW -> ( scope,C.PWild)
in (x:scope',C.PAs (i2i x) p') -- A.PInt i -> ( scope,C.PLit (C.LInt (fromIntegral i)))
A.PW -> ( scope,C.PWild) -- A.PFloat f -> ( scope,C.PLit (C.LFlt f))
A.PInt i -> ( scope,C.PLit (C.LInt (fromIntegral i))) -- A.PString s -> ( scope,C.PLit (C.LStr s))
A.PFloat f -> ( scope,C.PLit (C.LFlt f)) -- A.PImplArg p-> let (scope',p') = mkPatt scope p
A.PString s -> ( scope,C.PLit (C.LStr s)) -- in (scope',C.PImplArg p')
A.PImplArg p-> let (scope',p') = mkPatt scope p -- A.PTilde t -> ( scope,C.PTilde (mkExp scope t))
in (scope',C.PImplArg p') --
A.PTilde t -> ( scope,C.PTilde (mkExp scope t)) -- mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo])
-- mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty
mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo]) -- in if x == identW
mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty -- then ( scope,(bt,i2i x,ty'))
in if x == identW -- else (x:scope,(bt,i2i x,ty'))) scope hyps
then ( scope,(bt,i2i x,ty')) --
else (x:scope,(bt,i2i x,ty'))) scope hyps -- mkDef gr arity (Just eqs) = Just ([C.Equ ps' (mkExp scope' e) | L _ (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps]
-- ,generateByteCode gr arity eqs
mkDef gr arity (Just eqs) = Just ([C.Equ ps' (mkExp scope' e) | L _ (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps] -- )
,generateByteCode gr arity eqs -- mkDef gr arity Nothing = Nothing
) --
mkDef gr arity Nothing = Nothing -- mkArity (Just a) _ ty = a -- known arity, i.e. defined function
-- mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom
mkArity (Just a) _ ty = a -- known arity, i.e. defined function -- mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor
mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom -- in length ctxt
mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor --
in length ctxt -- genCncCats gr am cm cdefs =
-- let (index,cats) = mkCncCats 0 cdefs
genCncCats gr am cm cdefs = -- in (index, Map.fromList cats)
let (index,cats) = mkCncCats 0 cdefs -- where
in (index, Map.fromList cats) -- mkCncCats index [] = (index,[])
where -- mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs)
mkCncCats index [] = (index,[]) -- | id == cInt =
mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs) -- let cc = pgfCncCat gr lincat fidInt
| id == cInt = -- (index',cats) = mkCncCats index cdefs
let cc = pgfCncCat gr lincat fidInt -- in (index', (i2i id,cc) : cats)
(index',cats) = mkCncCats index cdefs -- | id == cFloat =
in (index', (i2i id,cc) : cats) -- let cc = pgfCncCat gr lincat fidFloat
| id == cFloat = -- (index',cats) = mkCncCats index cdefs
let cc = pgfCncCat gr lincat fidFloat -- in (index', (i2i id,cc) : cats)
(index',cats) = mkCncCats index cdefs -- | id == cString =
in (index', (i2i id,cc) : cats) -- let cc = pgfCncCat gr lincat fidString
| id == cString = -- (index',cats) = mkCncCats index cdefs
let cc = pgfCncCat gr lincat fidString -- in (index', (i2i id,cc) : cats)
(index',cats) = mkCncCats index cdefs -- | otherwise =
in (index', (i2i id,cc) : cats) -- let cc@(C.CncCat _s e _) = pgfCncCat gr lincat index
| otherwise = -- (index',cats) = mkCncCats (e+1) cdefs
let cc@(C.CncCat _s e _) = pgfCncCat gr lincat index -- in (index', (i2i id,cc) : cats)
(index',cats) = mkCncCats (e+1) cdefs -- mkCncCats index (_ :cdefs) = mkCncCats index cdefs
in (index', (i2i id,cc) : cats) --
mkCncCats index (_ :cdefs) = mkCncCats index cdefs -- genCncFuns :: Grammar
-- -> ModuleName
genCncFuns :: Grammar -- -> ModuleName
-> ModuleName -- -> Array SeqId Sequence
-> ModuleName -- -> (Sequence -> Sequence -> Ordering)
-> Array SeqId Sequence -- -> Array SeqId Sequence
-> (Sequence -> Sequence -> Ordering) -- -> [(QIdent, Info)]
-> Array SeqId Sequence -- -> FId
-> [(QIdent, Info)] -- -> Map.Map CId D.CncCat
-> FId -- -> (FId,
-> Map.Map CId D.CncCat -- IntMap.IntMap (Set.Set D.Production),
-> (FId, -- IntMap.IntMap [FunId],
IntMap.IntMap (Set.Set D.Production), -- IntMap.IntMap [FunId],
IntMap.IntMap [FunId], -- Array FunId D.CncFun)
IntMap.IntMap [FunId], -- genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccats =
Array FunId D.CncFun) -- let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty
genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccats = -- (fid_cnt2,funs_cnt2,funs2,prods) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty
let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty -- in (fid_cnt2,prods,lindefs,linrefs,array (0,funs_cnt2-1) funs2)
(fid_cnt2,funs_cnt2,funs2,prods) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty -- where
in (fid_cnt2,prods,lindefs,linrefs,array (0,funs_cnt2-1) funs2) -- mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs =
where -- (fid_cnt,funs_cnt,funs,lindefs,linrefs)
mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs = -- mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs =
(fid_cnt,funs_cnt,funs,lindefs,linrefs) -- let !funs_cnt' = let (s_funid, e_funid) = bounds funs0
mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs = -- in funs_cnt+(e_funid-s_funid+1)
let !funs_cnt' = let (s_funid, e_funid) = bounds funs0 -- lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0
in funs_cnt+(e_funid-s_funid+1) -- linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0
lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0 -- funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0)
linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0 -- in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs'
funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0) -- mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs =
in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs' -- mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs
mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs = --
mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs -- mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods =
-- (fid_cnt,funs_cnt,funs,prods)
mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods = -- mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods =
(fid_cnt,funs_cnt,funs,prods) -- let ---Ok ty_C = fmap GM.typeForm (Look.lookupFunType gr am id)
mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods = -- ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id)
let ---Ok ty_C = fmap GM.typeForm (Look.lookupFunType gr am id) -- !funs_cnt' = let (s_funid, e_funid) = bounds funs0
ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id) -- in funs_cnt+(e_funid-s_funid+1)
!funs_cnt' = let (s_funid, e_funid) = bounds funs0 -- !(fid_cnt',crc',prods')
in funs_cnt+(e_funid-s_funid+1) -- = foldl' (toProd lindefs ty_C funs_cnt)
!(fid_cnt',crc',prods') -- (fid_cnt,crc,prods) prods0
= foldl' (toProd lindefs ty_C funs_cnt) -- funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0)
(fid_cnt,crc,prods) prods0 -- in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods'
funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0) -- mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods =
in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods' -- mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods
mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods = --
mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods -- toProd lindefs (ctxt_C,res_C,_) offs st (Production fid0 funid0 args0) =
-- let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0)
toProd lindefs (ctxt_C,res_C,_) offs st (Production fid0 funid0 args0) = -- set0 = Set.fromList (map (C.PApply (offs+funid0)) (sequence args))
let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0) -- fid = mkFId res_C fid0
set0 = Set.fromList (map (C.PApply (offs+funid0)) (sequence args)) -- !prods' = case IntMap.lookup fid prods of
fid = mkFId res_C fid0 -- Just set -> IntMap.insert fid (Set.union set0 set) prods
!prods' = case IntMap.lookup fid prods of -- Nothing -> IntMap.insert fid set0 prods
Just set -> IntMap.insert fid (Set.union set0 set) prods -- in (fid_cnt,crc,prods')
Nothing -> IntMap.insert fid set0 prods -- where
in (fid_cnt,crc,prods') -- mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s ) =
where -- case fid0s of
mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s ) = -- [fid0] -> (st,map (flip C.PArg (mkFId arg_C fid0)) ctxt)
case fid0s of -- fid0s -> case Map.lookup fids crc of
[fid0] -> (st,map (flip C.PArg (mkFId arg_C fid0)) ctxt) -- Just fid -> (st,map (flip C.PArg fid) ctxt)
fid0s -> case Map.lookup fids crc of -- Nothing -> let !crc' = Map.insert fids fid_cnt crc
Just fid -> (st,map (flip C.PArg fid) ctxt) -- !prods' = IntMap.insert fid_cnt (Set.fromList (map C.PCoerce fids)) prods
Nothing -> let !crc' = Map.insert fids fid_cnt crc -- in ((fid_cnt+1,crc',prods'),map (flip C.PArg fid_cnt) ctxt)
!prods' = IntMap.insert fid_cnt (Set.fromList (map C.PCoerce fids)) prods -- where
in ((fid_cnt+1,crc',prods'),map (flip C.PArg fid_cnt) ctxt) -- (hargs_C,arg_C) = GM.catSkeleton ty
where -- ctxt = mapM (mkCtxt lindefs) hargs_C
(hargs_C,arg_C) = GM.catSkeleton ty -- fids = map (mkFId arg_C) fid0s
ctxt = mapM (mkCtxt lindefs) hargs_C --
fids = map (mkFId arg_C) fid0s -- mkLinDefId id = prefixIdent "lindef " id
--
mkLinDefId id = prefixIdent "lindef " id -- toLinDef res offs lindefs (Production fid0 funid0 args) =
-- if args == [[fidVar]]
toLinDef res offs lindefs (Production fid0 funid0 args) = -- then IntMap.insertWith (++) fid [offs+funid0] lindefs
if args == [[fidVar]] -- else lindefs
then IntMap.insertWith (++) fid [offs+funid0] lindefs -- where
else lindefs -- fid = mkFId res fid0
where --
fid = mkFId res fid0 -- toLinRef res offs linrefs (Production fid0 funid0 [fargs]) =
-- if fid0 == fidVar
toLinRef res offs linrefs (Production fid0 funid0 [fargs]) = -- then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids
if fid0 == fidVar -- else linrefs
then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids -- where
else linrefs -- fids = map (mkFId res) fargs
where --
fids = map (mkFId res) fargs -- mkFId (_,cat) fid0 =
-- case Map.lookup (i2i cat) cnccats of
mkFId (_,cat) fid0 = -- Just (C.CncCat s e _) -> s+fid0
case Map.lookup (i2i cat) cnccats of -- Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat)
Just (C.CncCat s e _) -> s+fid0 --
Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat) -- mkCtxt lindefs (_,cat) =
-- case Map.lookup (i2i cat) cnccats of
mkCtxt lindefs (_,cat) = -- Just (C.CncCat s e _) -> [(C.fidVar,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]]
case Map.lookup (i2i cat) cnccats of -- Nothing -> error "GrammarToPGF.mkCtxt failed"
Just (C.CncCat s e _) -> [(C.fidVar,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]] --
Nothing -> error "GrammarToPGF.mkCtxt failed" -- toCncFun offs (m,id) funs (funid0,lins0) =
-- let mseqs = case lookupModule gr m of
toCncFun offs (m,id) funs (funid0,lins0) = -- Ok (ModInfo{mseqs=Just mseqs}) -> mseqs
let mseqs = case lookupModule gr m of -- _ -> ex_seqs
Ok (ModInfo{mseqs=Just mseqs}) -> mseqs -- in (offs+funid0,C.CncFun (i2i id) (amap (newIndex mseqs) lins0)):funs
_ -> ex_seqs -- where
in (offs+funid0,C.CncFun (i2i id) (amap (newIndex mseqs) lins0)):funs -- newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs)
where --
newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs) -- binSearch v arr (i,j)
-- | i <= j = case ciCmp v (arr ! k) of
binSearch v arr (i,j) -- LT -> binSearch v arr (i,k-1)
| i <= j = case ciCmp v (arr ! k) of -- EQ -> k
LT -> binSearch v arr (i,k-1) -- GT -> binSearch v arr (k+1,j)
EQ -> k -- | otherwise = error "binSearch"
GT -> binSearch v arr (k+1,j) -- where
| otherwise = error "binSearch" -- k = (i+j) `div` 2
where --
k = (i+j) `div` 2 -- genPrintNames cdefs =
-- Map.fromAscList [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info]
genPrintNames cdefs = -- where
Map.fromAscList [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info] -- prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr]
where -- prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr]
prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr] -- prn _ = []
prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr] --
prn _ = [] -- flatten (K s) = s
-- flatten (Alts x _) = flatten x
flatten (K s) = s -- flatten (C x y) = flatten x +++ flatten y
flatten (Alts x _) = flatten x --
flatten (C x y) = flatten x +++ flatten y -- mkArray lst = listArray (0,length lst-1) lst
-- mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
mkArray lst = listArray (0,length lst-1) lst
mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]

View File

@@ -3,6 +3,8 @@ module GF.Compiler (mainGFC, linkGrammars, writePGF, writeOutputs) where
import PGF import PGF
import PGF.Internal(concretes,optimizePGF,unionPGF) import PGF.Internal(concretes,optimizePGF,unionPGF)
import PGF.Internal(putSplitAbs,encodeFile,runPut) import PGF.Internal(putSplitAbs,encodeFile,runPut)
import LPGF(LPGF)
import qualified LPGF
import GF.Compile as S(batchCompile,link,linkl,srcAbsName) import GF.Compile as S(batchCompile,link,linkl,srcAbsName)
import GF.CompileInParallel as P(parallelBatchCompile) import GF.CompileInParallel as P(parallelBatchCompile)
import GF.Compile.Export import GF.Compile.Export
@@ -97,10 +99,8 @@ compileSourceFiles opts fs =
-- recreated. Calls 'writePGF' and 'writeOutputs'. -- recreated. Calls 'writePGF' and 'writeOutputs'.
linkGrammars :: Options -> (UTCTime,[(ModuleName, Grammar)]) -> IOE () linkGrammars :: Options -> (UTCTime,[(ModuleName, Grammar)]) -> IOE ()
linkGrammars opts (_,cnc_grs) | FmtLPGF `elem` flag optOutputFormats opts = do linkGrammars opts (_,cnc_grs) | FmtLPGF `elem` flag optOutputFormats opts = do
pgfs <- mapM (linkl opts) cnc_grs lpgf <- linkl opts (head cnc_grs)
let pgf0 = foldl1 unionPGF pgfs writeLPGF opts lpgf
writePGF opts pgf0
putStrLn "LPGF"
linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) = linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) =
do let abs = render (srcAbsName gr cnc) do let abs = render (srcAbsName gr cnc)
pgfFile = outputPath opts (grammarName' opts abs<.>"pgf") pgfFile = outputPath opts (grammarName' opts abs<.>"pgf")
@@ -186,6 +186,12 @@ writePGF opts pgf =
let outfile = outputPath opts (showCId (fst cnc) <.> "pgf_c") let outfile = outputPath opts (showCId (fst cnc) <.> "pgf_c")
writing opts outfile $ encodeFile outfile cnc writing opts outfile $ encodeFile outfile cnc
writeLPGF :: Options -> LPGF -> IOE ()
writeLPGF opts lpgf = do
let
grammarName = fromMaybe (showCId (LPGF.abstractName lpgf)) (flag optName opts)
outfile = outputPath opts (grammarName <.> "lpgf")
writing opts outfile $ liftIO $ LPGF.encodeFile outfile lpgf
writeOutput :: Options -> FilePath-> String -> IOE () writeOutput :: Options -> FilePath-> String -> IOE ()
writeOutput opts file str = writing opts path $ writeUTF8File path str writeOutput opts file str = writing opts path $ writeUTF8File path str

199
src/runtime/haskell/LPGF.hs Normal file
View File

@@ -0,0 +1,199 @@
-- | Linearisation-only PGF format
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009)
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars"
module LPGF (
LPGF,
abstractName,
linearize, linearizeConcr,
readLPGF,
-- internal/testing
encodeFile,
zero
) where
import PGF (Language, readLanguage, showLanguage)
import PGF.CId
import PGF.Tree
import Control.Monad (forM_)
import qualified Data.Map as Map
import Text.Printf (printf)
-- | Linearisation-only PGF
data LPGF = LPGF {
absname :: CId,
abstract :: Abstr,
concretes :: Map.Map CId Concr
} deriving (Read, Show)
-- | Abstract syntax
data Abstr = Abstr {
cats :: Map.Map CId (),
funs :: Map.Map CId Type
} deriving (Read, Show)
-- | Concrete syntax
data Concr = Concr {
lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category
lins :: Map.Map CId LinFun -- ^ assigning a linearization function to each function
} deriving (Read, Show)
-- | Abstract function type
data Type = Type [CId] CId
deriving (Read, Show)
-- | Linearisation type
data LinType =
LTStr
| LTInt Int
| LTProduct [LinType]
deriving (Read, Show)
-- | Linearisation function
data LinFun =
LFEmpty
| LFToken String
| LFConcat LinFun LinFun
| LFInt Int
| LFTuple [LinFun]
| LFProjection LinFun LinFun -- ^ In order for the projection to be well-formed, t1 must be a tuple and t2 an integer within the bounds of the size of the tuple
| LFArgument Int
deriving (Read, Show)
abstractName :: LPGF -> CId
abstractName = absname
encodeFile :: FilePath -> LPGF -> IO ()
encodeFile path lpgf = writeFile path (show lpgf)
readLPGF :: FilePath -> IO LPGF
readLPGF path = read <$> readFile path
-- | Helper for building concat trees
mkConcat :: [LinFun] -> LinFun
mkConcat [] = LFEmpty
mkConcat [x] = x
mkConcat xs = foldl1 LFConcat xs
-- | Main linearize function
linearize :: LPGF -> Language -> Tree -> String
linearize lpgf lang =
case Map.lookup lang (concretes lpgf) of
Just concr -> linearizeConcr concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function
-- Section 2.5
linearizeConcr :: Concr -> Tree -> String
linearizeConcr concr tree = lin2string $ lin tree
where
lin :: Tree -> LinFun
lin tree = case tree of
Fun f as -> v
where
Just t = Map.lookup f (lins concr)
ts = map lin as
v = eval ts t
x -> error $ printf "Cannot lin %s" (prTree x)
-- | Evaluation context is a sequence of terms
type Context = [LinFun]
-- | Operational semantics, Table 2
eval :: Context -> LinFun -> LinFun
eval cxt t = case t of
LFEmpty -> LFEmpty
LFToken tok -> LFToken tok
LFConcat s t -> LFConcat v w
where
v = eval cxt s
w = eval cxt t
LFInt i -> LFInt i
LFTuple ts -> LFTuple vs
where vs = map (eval cxt) ts
LFProjection t u -> vs !! (i-1)
where
LFTuple vs = eval cxt t
LFInt i = eval cxt u
LFArgument i -> cxt !! (i-1)
-- | Turn concrete syntax terms into an actual string
lin2string :: LinFun -> String
lin2string l = case l of
LFEmpty -> ""
LFToken tok -> tok
LFConcat l1 l2 -> unwords [lin2string l1, lin2string l2]
x -> printf "[%s]" (show x)
---
main :: IO ()
main =
forM_ [tree1, tree2, tree3] $ \tree -> do
putStrLn (prTree tree)
forM_ (Map.toList (concretes zero)) $ \(lang,concr) ->
printf "%s: %s\n" (showLanguage lang) (linearizeConcr concr tree)
putStrLn ""
-- Pred John Walk
tree1 :: Tree
tree1 = Fun (mkCId "Pred") [Fun (mkCId "John") [], Fun (mkCId "Walk") []]
-- Pred We Walk
tree2 :: Tree
tree2 = Fun (mkCId "Pred") [Fun (mkCId "We") [], Fun (mkCId "Walk") []]
-- And (Pred John Walk) (Pred We Walk)
tree3 :: Tree
tree3 = Fun (mkCId "And") [tree1, tree2]
-- Initial LPGF, Figures 6 & 7
zero :: LPGF
zero = LPGF {
absname = mkCId "Zero",
abstract = Abstr {
cats = Map.fromList [
(mkCId "S", ()),
(mkCId "NP", ()),
(mkCId "VP", ())
],
funs = Map.fromList [
(mkCId "And", Type [mkCId "S", mkCId "S"] (mkCId "S")),
(mkCId "Pred", Type [mkCId "NP", mkCId "VP"] (mkCId "S")),
(mkCId "John", Type [] (mkCId "NP")),
(mkCId "We", Type [] (mkCId "NP")),
(mkCId "Walk", Type [] (mkCId "VP"))
]
},
concretes = Map.fromList [
(mkCId "ZeroEng", Concr {
lincats = Map.fromList [
(mkCId "S", LTStr),
(mkCId "NP", LTProduct [LTStr, LTInt 2]),
(mkCId "VP", LTProduct [LTStr, LTStr])
],
lins = Map.fromList [
(mkCId "And", mkConcat [LFArgument 1, LFToken "and", LFArgument 2]),
(mkCId "Pred", mkConcat [LFProjection (LFArgument 1) (LFInt 1), LFProjection (LFArgument 2) (LFProjection (LFArgument 1) (LFInt 2))]),
(mkCId "John", LFTuple [LFToken "John", LFInt 1]),
(mkCId "We", LFTuple [LFToken "we", LFInt 2]),
(mkCId "Walk", LFTuple [LFToken "walks", LFToken "walk"])
]
}),
(mkCId "ZeroGer", Concr {
lincats = Map.fromList [
(mkCId "S", LTStr),
(mkCId "NP", LTProduct [LTStr, LTInt 2, LTInt 3]),
(mkCId "VP", LTProduct [LTProduct [LTStr, LTStr, LTStr], LTProduct [LTStr, LTStr, LTStr]])
],
lins = Map.fromList [
(mkCId "And", mkConcat [LFArgument 1, LFToken "und", LFArgument 2]),
(mkCId "Pred", mkConcat [LFProjection (LFArgument 1) (LFInt 1), LFProjection (LFProjection (LFArgument 2) (LFProjection (LFArgument 1) (LFInt 2))) (LFProjection (LFArgument 1) (LFInt 3))]),
(mkCId "John", LFTuple [LFToken "John", LFInt 1, LFInt 3]),
(mkCId "We", LFTuple [LFToken "wir", LFInt 2, LFInt 1]),
(mkCId "Walk", LFTuple [LFTuple [LFToken "gehe", LFToken "gehst", LFToken "geht"], LFTuple [LFToken "gehen", LFToken "geht", LFToken "gehen"]])
]
})
]
}