1
0
forked from GitHub/gf-core

Add first version of LPGF datatype, with linearization function and some hardcoded examples

This commit is contained in:
John J. Camilleri
2021-01-22 14:07:41 +01:00
parent 8ad9cf1e09
commit 93b81b9f13
5 changed files with 517 additions and 305 deletions

View File

@@ -107,6 +107,7 @@ Library
PGF
PGF.Internal
PGF.Haskell
LPGF
other-modules:
PGF.Data

View File

@@ -25,6 +25,7 @@ import GF.Text.Pretty(render,($$),(<+>),nest)
import PGF.Internal(optimizePGF)
import PGF(PGF,defaultProbabilities,setProbabilities,readProbabilitiesFromFile)
import LPGF(LPGF)
-- | Compiles a number of source files and builds a 'PGF' structure for them.
-- This is a composition of 'link' and 'batchCompile'.
@@ -43,7 +44,8 @@ link opts (cnc,gr) =
return $ setProbabilities probs
$ if flag optOptimizePGF opts then optimizePGF pgf else pgf
linkl :: Options -> (ModuleName,Grammar) -> IOE PGF
-- | Link a grammar into a 'LPGF' that can be used for linearization only.
linkl :: Options -> (ModuleName,Grammar) -> IOE LPGF
linkl opts (cnc,gr) =
putPointE Normal opts "linking ... " $ do
let abs = srcAbsName gr cnc

View File

@@ -1,308 +1,312 @@
{-# LANGUAGE BangPatterns, FlexibleContexts #-}
-- {-# LANGUAGE BangPatterns, FlexibleContexts #-}
module GF.Compile.GrammarToLPGF (mkCanon2lpgf) where
import LPGF
--import GF.Compile.Export
import GF.Compile.GeneratePMCFG
import GF.Compile.GenerateBC
import PGF(CId,mkCId,utf8CId)
import PGF.Internal(fidInt,fidFloat,fidString,fidVar)
import PGF.Internal(updateProductionIndices)
import qualified PGF.Internal as C
import qualified PGF.Internal as D
import GF.Grammar.Predef
-- import GF.Compile.GeneratePMCFG
-- import GF.Compile.GenerateBC
--
-- import PGF(CId,mkCId,utf8CId)
-- import PGF.Internal(fidInt,fidFloat,fidString,fidVar)
-- import PGF.Internal(updateProductionIndices)
-- import qualified PGF.Internal as C
-- import qualified PGF.Internal as D
-- import GF.Grammar.Predef
import GF.Grammar.Grammar
import qualified GF.Grammar.Lookup as Look
import qualified GF.Grammar as A
import qualified GF.Grammar.Macros as GM
import GF.Infra.Ident
-- import qualified GF.Grammar.Lookup as Look
-- import qualified GF.Grammar as A
-- import qualified GF.Grammar.Macros as GM
--
-- import GF.Infra.Ident
import GF.Infra.Option
import GF.Infra.UseIO (IOE)
import GF.Data.Operations
-- import GF.Data.Operations
--
-- import Data.List
-- import qualified Data.Set as Set
-- import qualified Data.Map as Map
-- import qualified Data.IntMap as IntMap
-- import Data.Array.IArray
import Data.List
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Array.IArray
mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE LPGF
mkCanon2lpgf opts gr am =
return zero
mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE D.PGF
mkCanon2lpgf opts gr am = do
(an,abs) <- mkAbstr am
cncs <- mapM mkConcr (allConcretes gr am)
return $ updateProductionIndices (D.PGF Map.empty an abs (Map.fromList cncs))
where
cenv = resourceValues opts gr
mkAbstr am = return (mi2i am, D.Abstr flags funs cats)
where
aflags = err (const noOptions) mflags (lookupModule gr am)
adefs =
[((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
Look.allOrigInfos gr am
flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags]
funs = Map.fromList [(i2i f, (mkType [] ty, arity, mkDef gr arity mdef, 0)) |
((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs,
let arity = mkArity ma mdef ty]
cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0)) |
((m,c),AbsCat (Just (L _ cont))) <- adefs]
catfuns cat =
[(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True)) <- adefs, snd (GM.valCat ty) == cat]
mkConcr cm = do
let cflags = err (const noOptions) mflags (lookupModule gr cm)
ciCmp | flag optCaseSensitive cflags = compare
| otherwise = C.compareCaseInsensitve
(ex_seqs,cdefs) <- addMissingPMCFGs
Map.empty
([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++
Look.allOrigInfos gr cm)
let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags]
seqs = (mkArray . C.sortNubBy ciCmp . concat) $
(Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence
!(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs
!(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns)
= genCncFuns gr am cm ex_seqs_arr ciCmp seqs cdefs fid_cnt1 cnccats
printnames = genPrintNames cdefs
return (mi2i cm, D.Concr flags
printnames
cncfuns
lindefs
linrefs
seqs
productions
IntMap.empty
Map.empty
cnccats
IntMap.empty
fid_cnt2)
where
-- if some module was compiled with -no-pmcfg, then
-- we have to create the PMCFG code just before linking
addMissingPMCFGs seqs [] = return (seqs,[])
addMissingPMCFGs seqs (((m,id), info):is) = do
(seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info
(seqs,is ) <- addMissingPMCFGs seqs is
return (seqs, ((m,id), info) : is)
i2i :: Ident -> CId
i2i = utf8CId . ident2utf8
mi2i :: ModuleName -> CId
mi2i (MN i) = i2i i
mkType :: [Ident] -> A.Type -> C.Type
mkType scope t =
case GM.typeForm t of
(hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps
in C.DTyp hyps' (i2i cat) (map (mkExp scope') args)
mkExp :: [Ident] -> A.Term -> C.Expr
mkExp scope t =
case t of
Q (_,c) -> C.EFun (i2i c)
QC (_,c) -> C.EFun (i2i c)
Vr x -> case lookup x (zip scope [0..]) of
Just i -> C.EVar i
Nothing -> C.EMeta 0
Abs b x t-> C.EAbs b (i2i x) (mkExp (x:scope) t)
App t1 t2-> C.EApp (mkExp scope t1) (mkExp scope t2)
EInt i -> C.ELit (C.LInt (fromIntegral i))
EFloat f -> C.ELit (C.LFlt f)
K s -> C.ELit (C.LStr s)
Meta i -> C.EMeta i
_ -> C.EMeta 0
mkPatt scope p =
case p of
A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps
in (scope',C.PApp (i2i c) ps')
A.PV x -> (x:scope,C.PVar (i2i x))
A.PAs x p -> let (scope',p') = mkPatt scope p
in (x:scope',C.PAs (i2i x) p')
A.PW -> ( scope,C.PWild)
A.PInt i -> ( scope,C.PLit (C.LInt (fromIntegral i)))
A.PFloat f -> ( scope,C.PLit (C.LFlt f))
A.PString s -> ( scope,C.PLit (C.LStr s))
A.PImplArg p-> let (scope',p') = mkPatt scope p
in (scope',C.PImplArg p')
A.PTilde t -> ( scope,C.PTilde (mkExp scope t))
mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo])
mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty
in if x == identW
then ( scope,(bt,i2i x,ty'))
else (x:scope,(bt,i2i x,ty'))) scope hyps
mkDef gr arity (Just eqs) = Just ([C.Equ ps' (mkExp scope' e) | L _ (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps]
,generateByteCode gr arity eqs
)
mkDef gr arity Nothing = Nothing
mkArity (Just a) _ ty = a -- known arity, i.e. defined function
mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom
mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor
in length ctxt
genCncCats gr am cm cdefs =
let (index,cats) = mkCncCats 0 cdefs
in (index, Map.fromList cats)
where
mkCncCats index [] = (index,[])
mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs)
| id == cInt =
let cc = pgfCncCat gr lincat fidInt
(index',cats) = mkCncCats index cdefs
in (index', (i2i id,cc) : cats)
| id == cFloat =
let cc = pgfCncCat gr lincat fidFloat
(index',cats) = mkCncCats index cdefs
in (index', (i2i id,cc) : cats)
| id == cString =
let cc = pgfCncCat gr lincat fidString
(index',cats) = mkCncCats index cdefs
in (index', (i2i id,cc) : cats)
| otherwise =
let cc@(C.CncCat _s e _) = pgfCncCat gr lincat index
(index',cats) = mkCncCats (e+1) cdefs
in (index', (i2i id,cc) : cats)
mkCncCats index (_ :cdefs) = mkCncCats index cdefs
genCncFuns :: Grammar
-> ModuleName
-> ModuleName
-> Array SeqId Sequence
-> (Sequence -> Sequence -> Ordering)
-> Array SeqId Sequence
-> [(QIdent, Info)]
-> FId
-> Map.Map CId D.CncCat
-> (FId,
IntMap.IntMap (Set.Set D.Production),
IntMap.IntMap [FunId],
IntMap.IntMap [FunId],
Array FunId D.CncFun)
genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccats =
let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty
(fid_cnt2,funs_cnt2,funs2,prods) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty
in (fid_cnt2,prods,lindefs,linrefs,array (0,funs_cnt2-1) funs2)
where
mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs =
(fid_cnt,funs_cnt,funs,lindefs,linrefs)
mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs =
let !funs_cnt' = let (s_funid, e_funid) = bounds funs0
in funs_cnt+(e_funid-s_funid+1)
lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0
linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0
funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0)
in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs'
mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs =
mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs
mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods =
(fid_cnt,funs_cnt,funs,prods)
mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods =
let ---Ok ty_C = fmap GM.typeForm (Look.lookupFunType gr am id)
ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id)
!funs_cnt' = let (s_funid, e_funid) = bounds funs0
in funs_cnt+(e_funid-s_funid+1)
!(fid_cnt',crc',prods')
= foldl' (toProd lindefs ty_C funs_cnt)
(fid_cnt,crc,prods) prods0
funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0)
in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods'
mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods =
mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods
toProd lindefs (ctxt_C,res_C,_) offs st (Production fid0 funid0 args0) =
let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0)
set0 = Set.fromList (map (C.PApply (offs+funid0)) (sequence args))
fid = mkFId res_C fid0
!prods' = case IntMap.lookup fid prods of
Just set -> IntMap.insert fid (Set.union set0 set) prods
Nothing -> IntMap.insert fid set0 prods
in (fid_cnt,crc,prods')
where
mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s ) =
case fid0s of
[fid0] -> (st,map (flip C.PArg (mkFId arg_C fid0)) ctxt)
fid0s -> case Map.lookup fids crc of
Just fid -> (st,map (flip C.PArg fid) ctxt)
Nothing -> let !crc' = Map.insert fids fid_cnt crc
!prods' = IntMap.insert fid_cnt (Set.fromList (map C.PCoerce fids)) prods
in ((fid_cnt+1,crc',prods'),map (flip C.PArg fid_cnt) ctxt)
where
(hargs_C,arg_C) = GM.catSkeleton ty
ctxt = mapM (mkCtxt lindefs) hargs_C
fids = map (mkFId arg_C) fid0s
mkLinDefId id = prefixIdent "lindef " id
toLinDef res offs lindefs (Production fid0 funid0 args) =
if args == [[fidVar]]
then IntMap.insertWith (++) fid [offs+funid0] lindefs
else lindefs
where
fid = mkFId res fid0
toLinRef res offs linrefs (Production fid0 funid0 [fargs]) =
if fid0 == fidVar
then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids
else linrefs
where
fids = map (mkFId res) fargs
mkFId (_,cat) fid0 =
case Map.lookup (i2i cat) cnccats of
Just (C.CncCat s e _) -> s+fid0
Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat)
mkCtxt lindefs (_,cat) =
case Map.lookup (i2i cat) cnccats of
Just (C.CncCat s e _) -> [(C.fidVar,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]]
Nothing -> error "GrammarToPGF.mkCtxt failed"
toCncFun offs (m,id) funs (funid0,lins0) =
let mseqs = case lookupModule gr m of
Ok (ModInfo{mseqs=Just mseqs}) -> mseqs
_ -> ex_seqs
in (offs+funid0,C.CncFun (i2i id) (amap (newIndex mseqs) lins0)):funs
where
newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs)
binSearch v arr (i,j)
| i <= j = case ciCmp v (arr ! k) of
LT -> binSearch v arr (i,k-1)
EQ -> k
GT -> binSearch v arr (k+1,j)
| otherwise = error "binSearch"
where
k = (i+j) `div` 2
genPrintNames cdefs =
Map.fromAscList [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info]
where
prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr]
prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr]
prn _ = []
flatten (K s) = s
flatten (Alts x _) = flatten x
flatten (C x y) = flatten x +++ flatten y
mkArray lst = listArray (0,length lst-1) lst
mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
-- (an,abs) <- mkAbstr am
-- cncs <- mapM mkConcr (allConcretes gr am)
-- -- return $ updateProductionIndices (D.PGF Map.empty an abs (Map.fromList cncs))
-- return $ D.PGF Map.empty an abs (Map.fromList cncs)
-- where
-- cenv = resourceValues opts gr
--
-- mkAbstr am = return (mi2i am, D.Abstr flags funs cats)
-- where
-- aflags = err (const noOptions) mflags (lookupModule gr am)
--
-- adefs =
-- [((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
-- Look.allOrigInfos gr am
--
-- flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags]
--
-- funs = Map.fromList [(i2i f, (mkType [] ty, arity, mkDef gr arity mdef, 0)) |
-- ((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs,
-- let arity = mkArity ma mdef ty]
--
-- cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0)) |
-- ((m,c),AbsCat (Just (L _ cont))) <- adefs]
--
-- catfuns cat =
-- [(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True)) <- adefs, snd (GM.valCat ty) == cat]
--
-- mkConcr cm = do
-- let cflags = err (const noOptions) mflags (lookupModule gr cm)
-- ciCmp | flag optCaseSensitive cflags = compare
-- | otherwise = C.compareCaseInsensitve
--
-- (ex_seqs,cdefs) <- addMissingPMCFGs
-- Map.empty
-- ([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++
-- Look.allOrigInfos gr cm)
--
-- let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags]
--
-- seqs = (mkArray . C.sortNubBy ciCmp . concat) $
-- (Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
--
-- ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence
--
-- !(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs
-- !(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns)
-- = genCncFuns gr am cm ex_seqs_arr ciCmp seqs cdefs fid_cnt1 cnccats
--
-- printnames = genPrintNames cdefs
-- return (mi2i cm, D.Concr flags
-- printnames
-- cncfuns
-- lindefs
-- linrefs
-- seqs
-- productions
-- IntMap.empty
-- Map.empty
-- cnccats
-- IntMap.empty
-- fid_cnt2)
-- where
-- -- if some module was compiled with -no-pmcfg, then
-- -- we have to create the PMCFG code just before linking
-- addMissingPMCFGs seqs [] = return (seqs,[])
-- addMissingPMCFGs seqs (((m,id), info):is) = do
-- (seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info
-- (seqs,is ) <- addMissingPMCFGs seqs is
-- return (seqs, ((m,id), info) : is)
--
-- i2i :: Ident -> CId
-- i2i = utf8CId . ident2utf8
--
-- mi2i :: ModuleName -> CId
-- mi2i (MN i) = i2i i
--
-- mkType :: [Ident] -> A.Type -> C.Type
-- mkType scope t =
-- case GM.typeForm t of
-- (hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps
-- in C.DTyp hyps' (i2i cat) (map (mkExp scope') args)
--
-- mkExp :: [Ident] -> A.Term -> C.Expr
-- mkExp scope t =
-- case t of
-- Q (_,c) -> C.EFun (i2i c)
-- QC (_,c) -> C.EFun (i2i c)
-- Vr x -> case lookup x (zip scope [0..]) of
-- Just i -> C.EVar i
-- Nothing -> C.EMeta 0
-- Abs b x t-> C.EAbs b (i2i x) (mkExp (x:scope) t)
-- App t1 t2-> C.EApp (mkExp scope t1) (mkExp scope t2)
-- EInt i -> C.ELit (C.LInt (fromIntegral i))
-- EFloat f -> C.ELit (C.LFlt f)
-- K s -> C.ELit (C.LStr s)
-- Meta i -> C.EMeta i
-- _ -> C.EMeta 0
--
-- mkPatt scope p =
-- case p of
-- A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps
-- in (scope',C.PApp (i2i c) ps')
-- A.PV x -> (x:scope,C.PVar (i2i x))
-- A.PAs x p -> let (scope',p') = mkPatt scope p
-- in (x:scope',C.PAs (i2i x) p')
-- A.PW -> ( scope,C.PWild)
-- A.PInt i -> ( scope,C.PLit (C.LInt (fromIntegral i)))
-- A.PFloat f -> ( scope,C.PLit (C.LFlt f))
-- A.PString s -> ( scope,C.PLit (C.LStr s))
-- A.PImplArg p-> let (scope',p') = mkPatt scope p
-- in (scope',C.PImplArg p')
-- A.PTilde t -> ( scope,C.PTilde (mkExp scope t))
--
-- mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo])
-- mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty
-- in if x == identW
-- then ( scope,(bt,i2i x,ty'))
-- else (x:scope,(bt,i2i x,ty'))) scope hyps
--
-- mkDef gr arity (Just eqs) = Just ([C.Equ ps' (mkExp scope' e) | L _ (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps]
-- ,generateByteCode gr arity eqs
-- )
-- mkDef gr arity Nothing = Nothing
--
-- mkArity (Just a) _ ty = a -- known arity, i.e. defined function
-- mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom
-- mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor
-- in length ctxt
--
-- genCncCats gr am cm cdefs =
-- let (index,cats) = mkCncCats 0 cdefs
-- in (index, Map.fromList cats)
-- where
-- mkCncCats index [] = (index,[])
-- mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs)
-- | id == cInt =
-- let cc = pgfCncCat gr lincat fidInt
-- (index',cats) = mkCncCats index cdefs
-- in (index', (i2i id,cc) : cats)
-- | id == cFloat =
-- let cc = pgfCncCat gr lincat fidFloat
-- (index',cats) = mkCncCats index cdefs
-- in (index', (i2i id,cc) : cats)
-- | id == cString =
-- let cc = pgfCncCat gr lincat fidString
-- (index',cats) = mkCncCats index cdefs
-- in (index', (i2i id,cc) : cats)
-- | otherwise =
-- let cc@(C.CncCat _s e _) = pgfCncCat gr lincat index
-- (index',cats) = mkCncCats (e+1) cdefs
-- in (index', (i2i id,cc) : cats)
-- mkCncCats index (_ :cdefs) = mkCncCats index cdefs
--
-- genCncFuns :: Grammar
-- -> ModuleName
-- -> ModuleName
-- -> Array SeqId Sequence
-- -> (Sequence -> Sequence -> Ordering)
-- -> Array SeqId Sequence
-- -> [(QIdent, Info)]
-- -> FId
-- -> Map.Map CId D.CncCat
-- -> (FId,
-- IntMap.IntMap (Set.Set D.Production),
-- IntMap.IntMap [FunId],
-- IntMap.IntMap [FunId],
-- Array FunId D.CncFun)
-- genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccats =
-- let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty
-- (fid_cnt2,funs_cnt2,funs2,prods) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty
-- in (fid_cnt2,prods,lindefs,linrefs,array (0,funs_cnt2-1) funs2)
-- where
-- mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs =
-- (fid_cnt,funs_cnt,funs,lindefs,linrefs)
-- mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs =
-- let !funs_cnt' = let (s_funid, e_funid) = bounds funs0
-- in funs_cnt+(e_funid-s_funid+1)
-- lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0
-- linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0
-- funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0)
-- in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs'
-- mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs =
-- mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs
--
-- mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods =
-- (fid_cnt,funs_cnt,funs,prods)
-- mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods =
-- let ---Ok ty_C = fmap GM.typeForm (Look.lookupFunType gr am id)
-- ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id)
-- !funs_cnt' = let (s_funid, e_funid) = bounds funs0
-- in funs_cnt+(e_funid-s_funid+1)
-- !(fid_cnt',crc',prods')
-- = foldl' (toProd lindefs ty_C funs_cnt)
-- (fid_cnt,crc,prods) prods0
-- funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0)
-- in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods'
-- mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods =
-- mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods
--
-- toProd lindefs (ctxt_C,res_C,_) offs st (Production fid0 funid0 args0) =
-- let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0)
-- set0 = Set.fromList (map (C.PApply (offs+funid0)) (sequence args))
-- fid = mkFId res_C fid0
-- !prods' = case IntMap.lookup fid prods of
-- Just set -> IntMap.insert fid (Set.union set0 set) prods
-- Nothing -> IntMap.insert fid set0 prods
-- in (fid_cnt,crc,prods')
-- where
-- mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s ) =
-- case fid0s of
-- [fid0] -> (st,map (flip C.PArg (mkFId arg_C fid0)) ctxt)
-- fid0s -> case Map.lookup fids crc of
-- Just fid -> (st,map (flip C.PArg fid) ctxt)
-- Nothing -> let !crc' = Map.insert fids fid_cnt crc
-- !prods' = IntMap.insert fid_cnt (Set.fromList (map C.PCoerce fids)) prods
-- in ((fid_cnt+1,crc',prods'),map (flip C.PArg fid_cnt) ctxt)
-- where
-- (hargs_C,arg_C) = GM.catSkeleton ty
-- ctxt = mapM (mkCtxt lindefs) hargs_C
-- fids = map (mkFId arg_C) fid0s
--
-- mkLinDefId id = prefixIdent "lindef " id
--
-- toLinDef res offs lindefs (Production fid0 funid0 args) =
-- if args == [[fidVar]]
-- then IntMap.insertWith (++) fid [offs+funid0] lindefs
-- else lindefs
-- where
-- fid = mkFId res fid0
--
-- toLinRef res offs linrefs (Production fid0 funid0 [fargs]) =
-- if fid0 == fidVar
-- then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids
-- else linrefs
-- where
-- fids = map (mkFId res) fargs
--
-- mkFId (_,cat) fid0 =
-- case Map.lookup (i2i cat) cnccats of
-- Just (C.CncCat s e _) -> s+fid0
-- Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat)
--
-- mkCtxt lindefs (_,cat) =
-- case Map.lookup (i2i cat) cnccats of
-- Just (C.CncCat s e _) -> [(C.fidVar,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]]
-- Nothing -> error "GrammarToPGF.mkCtxt failed"
--
-- toCncFun offs (m,id) funs (funid0,lins0) =
-- let mseqs = case lookupModule gr m of
-- Ok (ModInfo{mseqs=Just mseqs}) -> mseqs
-- _ -> ex_seqs
-- in (offs+funid0,C.CncFun (i2i id) (amap (newIndex mseqs) lins0)):funs
-- where
-- newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs)
--
-- binSearch v arr (i,j)
-- | i <= j = case ciCmp v (arr ! k) of
-- LT -> binSearch v arr (i,k-1)
-- EQ -> k
-- GT -> binSearch v arr (k+1,j)
-- | otherwise = error "binSearch"
-- where
-- k = (i+j) `div` 2
--
-- genPrintNames cdefs =
-- Map.fromAscList [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info]
-- where
-- prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr]
-- prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr]
-- prn _ = []
--
-- flatten (K s) = s
-- flatten (Alts x _) = flatten x
-- flatten (C x y) = flatten x +++ flatten y
--
-- mkArray lst = listArray (0,length lst-1) lst
-- mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]

View File

@@ -3,6 +3,8 @@ module GF.Compiler (mainGFC, linkGrammars, writePGF, writeOutputs) where
import PGF
import PGF.Internal(concretes,optimizePGF,unionPGF)
import PGF.Internal(putSplitAbs,encodeFile,runPut)
import LPGF(LPGF)
import qualified LPGF
import GF.Compile as S(batchCompile,link,linkl,srcAbsName)
import GF.CompileInParallel as P(parallelBatchCompile)
import GF.Compile.Export
@@ -97,10 +99,8 @@ compileSourceFiles opts fs =
-- recreated. Calls 'writePGF' and 'writeOutputs'.
linkGrammars :: Options -> (UTCTime,[(ModuleName, Grammar)]) -> IOE ()
linkGrammars opts (_,cnc_grs) | FmtLPGF `elem` flag optOutputFormats opts = do
pgfs <- mapM (linkl opts) cnc_grs
let pgf0 = foldl1 unionPGF pgfs
writePGF opts pgf0
putStrLn "LPGF"
lpgf <- linkl opts (head cnc_grs)
writeLPGF opts lpgf
linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) =
do let abs = render (srcAbsName gr cnc)
pgfFile = outputPath opts (grammarName' opts abs<.>"pgf")
@@ -186,6 +186,12 @@ writePGF opts pgf =
let outfile = outputPath opts (showCId (fst cnc) <.> "pgf_c")
writing opts outfile $ encodeFile outfile cnc
writeLPGF :: Options -> LPGF -> IOE ()
writeLPGF opts lpgf = do
let
grammarName = fromMaybe (showCId (LPGF.abstractName lpgf)) (flag optName opts)
outfile = outputPath opts (grammarName <.> "lpgf")
writing opts outfile $ liftIO $ LPGF.encodeFile outfile lpgf
writeOutput :: Options -> FilePath-> String -> IOE ()
writeOutput opts file str = writing opts path $ writeUTF8File path str

199
src/runtime/haskell/LPGF.hs Normal file
View File

@@ -0,0 +1,199 @@
-- | Linearisation-only PGF format
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009)
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars"
module LPGF (
LPGF,
abstractName,
linearize, linearizeConcr,
readLPGF,
-- internal/testing
encodeFile,
zero
) where
import PGF (Language, readLanguage, showLanguage)
import PGF.CId
import PGF.Tree
import Control.Monad (forM_)
import qualified Data.Map as Map
import Text.Printf (printf)
-- | Linearisation-only PGF
data LPGF = LPGF {
absname :: CId,
abstract :: Abstr,
concretes :: Map.Map CId Concr
} deriving (Read, Show)
-- | Abstract syntax
data Abstr = Abstr {
cats :: Map.Map CId (),
funs :: Map.Map CId Type
} deriving (Read, Show)
-- | Concrete syntax
data Concr = Concr {
lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category
lins :: Map.Map CId LinFun -- ^ assigning a linearization function to each function
} deriving (Read, Show)
-- | Abstract function type
data Type = Type [CId] CId
deriving (Read, Show)
-- | Linearisation type
data LinType =
LTStr
| LTInt Int
| LTProduct [LinType]
deriving (Read, Show)
-- | Linearisation function
data LinFun =
LFEmpty
| LFToken String
| LFConcat LinFun LinFun
| LFInt Int
| LFTuple [LinFun]
| LFProjection LinFun LinFun -- ^ In order for the projection to be well-formed, t1 must be a tuple and t2 an integer within the bounds of the size of the tuple
| LFArgument Int
deriving (Read, Show)
abstractName :: LPGF -> CId
abstractName = absname
encodeFile :: FilePath -> LPGF -> IO ()
encodeFile path lpgf = writeFile path (show lpgf)
readLPGF :: FilePath -> IO LPGF
readLPGF path = read <$> readFile path
-- | Helper for building concat trees
mkConcat :: [LinFun] -> LinFun
mkConcat [] = LFEmpty
mkConcat [x] = x
mkConcat xs = foldl1 LFConcat xs
-- | Main linearize function
linearize :: LPGF -> Language -> Tree -> String
linearize lpgf lang =
case Map.lookup lang (concretes lpgf) of
Just concr -> linearizeConcr concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function
-- Section 2.5
linearizeConcr :: Concr -> Tree -> String
linearizeConcr concr tree = lin2string $ lin tree
where
lin :: Tree -> LinFun
lin tree = case tree of
Fun f as -> v
where
Just t = Map.lookup f (lins concr)
ts = map lin as
v = eval ts t
x -> error $ printf "Cannot lin %s" (prTree x)
-- | Evaluation context is a sequence of terms
type Context = [LinFun]
-- | Operational semantics, Table 2
eval :: Context -> LinFun -> LinFun
eval cxt t = case t of
LFEmpty -> LFEmpty
LFToken tok -> LFToken tok
LFConcat s t -> LFConcat v w
where
v = eval cxt s
w = eval cxt t
LFInt i -> LFInt i
LFTuple ts -> LFTuple vs
where vs = map (eval cxt) ts
LFProjection t u -> vs !! (i-1)
where
LFTuple vs = eval cxt t
LFInt i = eval cxt u
LFArgument i -> cxt !! (i-1)
-- | Turn concrete syntax terms into an actual string
lin2string :: LinFun -> String
lin2string l = case l of
LFEmpty -> ""
LFToken tok -> tok
LFConcat l1 l2 -> unwords [lin2string l1, lin2string l2]
x -> printf "[%s]" (show x)
---
main :: IO ()
main =
forM_ [tree1, tree2, tree3] $ \tree -> do
putStrLn (prTree tree)
forM_ (Map.toList (concretes zero)) $ \(lang,concr) ->
printf "%s: %s\n" (showLanguage lang) (linearizeConcr concr tree)
putStrLn ""
-- Pred John Walk
tree1 :: Tree
tree1 = Fun (mkCId "Pred") [Fun (mkCId "John") [], Fun (mkCId "Walk") []]
-- Pred We Walk
tree2 :: Tree
tree2 = Fun (mkCId "Pred") [Fun (mkCId "We") [], Fun (mkCId "Walk") []]
-- And (Pred John Walk) (Pred We Walk)
tree3 :: Tree
tree3 = Fun (mkCId "And") [tree1, tree2]
-- Initial LPGF, Figures 6 & 7
zero :: LPGF
zero = LPGF {
absname = mkCId "Zero",
abstract = Abstr {
cats = Map.fromList [
(mkCId "S", ()),
(mkCId "NP", ()),
(mkCId "VP", ())
],
funs = Map.fromList [
(mkCId "And", Type [mkCId "S", mkCId "S"] (mkCId "S")),
(mkCId "Pred", Type [mkCId "NP", mkCId "VP"] (mkCId "S")),
(mkCId "John", Type [] (mkCId "NP")),
(mkCId "We", Type [] (mkCId "NP")),
(mkCId "Walk", Type [] (mkCId "VP"))
]
},
concretes = Map.fromList [
(mkCId "ZeroEng", Concr {
lincats = Map.fromList [
(mkCId "S", LTStr),
(mkCId "NP", LTProduct [LTStr, LTInt 2]),
(mkCId "VP", LTProduct [LTStr, LTStr])
],
lins = Map.fromList [
(mkCId "And", mkConcat [LFArgument 1, LFToken "and", LFArgument 2]),
(mkCId "Pred", mkConcat [LFProjection (LFArgument 1) (LFInt 1), LFProjection (LFArgument 2) (LFProjection (LFArgument 1) (LFInt 2))]),
(mkCId "John", LFTuple [LFToken "John", LFInt 1]),
(mkCId "We", LFTuple [LFToken "we", LFInt 2]),
(mkCId "Walk", LFTuple [LFToken "walks", LFToken "walk"])
]
}),
(mkCId "ZeroGer", Concr {
lincats = Map.fromList [
(mkCId "S", LTStr),
(mkCId "NP", LTProduct [LTStr, LTInt 2, LTInt 3]),
(mkCId "VP", LTProduct [LTProduct [LTStr, LTStr, LTStr], LTProduct [LTStr, LTStr, LTStr]])
],
lins = Map.fromList [
(mkCId "And", mkConcat [LFArgument 1, LFToken "und", LFArgument 2]),
(mkCId "Pred", mkConcat [LFProjection (LFArgument 1) (LFInt 1), LFProjection (LFProjection (LFArgument 2) (LFProjection (LFArgument 1) (LFInt 2))) (LFProjection (LFArgument 1) (LFInt 3))]),
(mkCId "John", LFTuple [LFToken "John", LFInt 1, LFInt 3]),
(mkCId "We", LFTuple [LFToken "wir", LFInt 2, LFInt 1]),
(mkCId "Walk", LFTuple [LFTuple [LFToken "gehe", LFToken "gehst", LFToken "geht"], LFTuple [LFToken "gehen", LFToken "geht", LFToken "gehen"]])
]
})
]
}