From 93b81b9f132b8cdbfe860b631a80bb271181bc3c Mon Sep 17 00:00:00 2001 From: "John J. Camilleri" Date: Fri, 22 Jan 2021 14:07:41 +0100 Subject: [PATCH] Add first version of LPGF datatype, with linearization function and some hardcoded examples --- gf.cabal | 1 + src/compiler/GF/Compile.hs | 4 +- src/compiler/GF/Compile/GrammarToLPGF.hs | 604 ++++++++++++----------- src/compiler/GF/Compiler.hs | 14 +- src/runtime/haskell/LPGF.hs | 199 ++++++++ 5 files changed, 517 insertions(+), 305 deletions(-) create mode 100644 src/runtime/haskell/LPGF.hs diff --git a/gf.cabal b/gf.cabal index 5923a0561..95de3cc95 100644 --- a/gf.cabal +++ b/gf.cabal @@ -107,6 +107,7 @@ Library PGF PGF.Internal PGF.Haskell + LPGF other-modules: PGF.Data diff --git a/src/compiler/GF/Compile.hs b/src/compiler/GF/Compile.hs index 07ffe593f..e07e7ba73 100644 --- a/src/compiler/GF/Compile.hs +++ b/src/compiler/GF/Compile.hs @@ -25,6 +25,7 @@ import GF.Text.Pretty(render,($$),(<+>),nest) import PGF.Internal(optimizePGF) import PGF(PGF,defaultProbabilities,setProbabilities,readProbabilitiesFromFile) +import LPGF(LPGF) -- | Compiles a number of source files and builds a 'PGF' structure for them. -- This is a composition of 'link' and 'batchCompile'. @@ -43,7 +44,8 @@ link opts (cnc,gr) = return $ setProbabilities probs $ if flag optOptimizePGF opts then optimizePGF pgf else pgf -linkl :: Options -> (ModuleName,Grammar) -> IOE PGF +-- | Link a grammar into a 'LPGF' that can be used for linearization only. +linkl :: Options -> (ModuleName,Grammar) -> IOE LPGF linkl opts (cnc,gr) = putPointE Normal opts "linking ... " $ do let abs = srcAbsName gr cnc diff --git a/src/compiler/GF/Compile/GrammarToLPGF.hs b/src/compiler/GF/Compile/GrammarToLPGF.hs index e2bebb52c..87e6f9f5e 100644 --- a/src/compiler/GF/Compile/GrammarToLPGF.hs +++ b/src/compiler/GF/Compile/GrammarToLPGF.hs @@ -1,308 +1,312 @@ -{-# LANGUAGE BangPatterns, FlexibleContexts #-} +-- {-# LANGUAGE BangPatterns, FlexibleContexts #-} module GF.Compile.GrammarToLPGF (mkCanon2lpgf) where +import LPGF + --import GF.Compile.Export -import GF.Compile.GeneratePMCFG -import GF.Compile.GenerateBC - -import PGF(CId,mkCId,utf8CId) -import PGF.Internal(fidInt,fidFloat,fidString,fidVar) -import PGF.Internal(updateProductionIndices) -import qualified PGF.Internal as C -import qualified PGF.Internal as D -import GF.Grammar.Predef +-- import GF.Compile.GeneratePMCFG +-- import GF.Compile.GenerateBC +-- +-- import PGF(CId,mkCId,utf8CId) +-- import PGF.Internal(fidInt,fidFloat,fidString,fidVar) +-- import PGF.Internal(updateProductionIndices) +-- import qualified PGF.Internal as C +-- import qualified PGF.Internal as D +-- import GF.Grammar.Predef import GF.Grammar.Grammar -import qualified GF.Grammar.Lookup as Look -import qualified GF.Grammar as A -import qualified GF.Grammar.Macros as GM - -import GF.Infra.Ident +-- import qualified GF.Grammar.Lookup as Look +-- import qualified GF.Grammar as A +-- import qualified GF.Grammar.Macros as GM +-- +-- import GF.Infra.Ident import GF.Infra.Option import GF.Infra.UseIO (IOE) -import GF.Data.Operations +-- import GF.Data.Operations +-- +-- import Data.List +-- import qualified Data.Set as Set +-- import qualified Data.Map as Map +-- import qualified Data.IntMap as IntMap +-- import Data.Array.IArray -import Data.List -import qualified Data.Set as Set -import qualified Data.Map as Map -import qualified Data.IntMap as IntMap -import Data.Array.IArray +mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE LPGF +mkCanon2lpgf opts gr am = + return zero - -mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE D.PGF -mkCanon2lpgf opts gr am = do - (an,abs) <- mkAbstr am - cncs <- mapM mkConcr (allConcretes gr am) - return $ updateProductionIndices (D.PGF Map.empty an abs (Map.fromList cncs)) - where - cenv = resourceValues opts gr - - mkAbstr am = return (mi2i am, D.Abstr flags funs cats) - where - aflags = err (const noOptions) mflags (lookupModule gr am) - - adefs = - [((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++ - Look.allOrigInfos gr am - - flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags] - - funs = Map.fromList [(i2i f, (mkType [] ty, arity, mkDef gr arity mdef, 0)) | - ((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs, - let arity = mkArity ma mdef ty] - - cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0)) | - ((m,c),AbsCat (Just (L _ cont))) <- adefs] - - catfuns cat = - [(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True)) <- adefs, snd (GM.valCat ty) == cat] - - mkConcr cm = do - let cflags = err (const noOptions) mflags (lookupModule gr cm) - ciCmp | flag optCaseSensitive cflags = compare - | otherwise = C.compareCaseInsensitve - - (ex_seqs,cdefs) <- addMissingPMCFGs - Map.empty - ([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++ - Look.allOrigInfos gr cm) - - let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags] - - seqs = (mkArray . C.sortNubBy ciCmp . concat) $ - (Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm]) - - ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence - - !(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs - !(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns) - = genCncFuns gr am cm ex_seqs_arr ciCmp seqs cdefs fid_cnt1 cnccats - - printnames = genPrintNames cdefs - return (mi2i cm, D.Concr flags - printnames - cncfuns - lindefs - linrefs - seqs - productions - IntMap.empty - Map.empty - cnccats - IntMap.empty - fid_cnt2) - where - -- if some module was compiled with -no-pmcfg, then - -- we have to create the PMCFG code just before linking - addMissingPMCFGs seqs [] = return (seqs,[]) - addMissingPMCFGs seqs (((m,id), info):is) = do - (seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info - (seqs,is ) <- addMissingPMCFGs seqs is - return (seqs, ((m,id), info) : is) - -i2i :: Ident -> CId -i2i = utf8CId . ident2utf8 - -mi2i :: ModuleName -> CId -mi2i (MN i) = i2i i - -mkType :: [Ident] -> A.Type -> C.Type -mkType scope t = - case GM.typeForm t of - (hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps - in C.DTyp hyps' (i2i cat) (map (mkExp scope') args) - -mkExp :: [Ident] -> A.Term -> C.Expr -mkExp scope t = - case t of - Q (_,c) -> C.EFun (i2i c) - QC (_,c) -> C.EFun (i2i c) - Vr x -> case lookup x (zip scope [0..]) of - Just i -> C.EVar i - Nothing -> C.EMeta 0 - Abs b x t-> C.EAbs b (i2i x) (mkExp (x:scope) t) - App t1 t2-> C.EApp (mkExp scope t1) (mkExp scope t2) - EInt i -> C.ELit (C.LInt (fromIntegral i)) - EFloat f -> C.ELit (C.LFlt f) - K s -> C.ELit (C.LStr s) - Meta i -> C.EMeta i - _ -> C.EMeta 0 - -mkPatt scope p = - case p of - A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps - in (scope',C.PApp (i2i c) ps') - A.PV x -> (x:scope,C.PVar (i2i x)) - A.PAs x p -> let (scope',p') = mkPatt scope p - in (x:scope',C.PAs (i2i x) p') - A.PW -> ( scope,C.PWild) - A.PInt i -> ( scope,C.PLit (C.LInt (fromIntegral i))) - A.PFloat f -> ( scope,C.PLit (C.LFlt f)) - A.PString s -> ( scope,C.PLit (C.LStr s)) - A.PImplArg p-> let (scope',p') = mkPatt scope p - in (scope',C.PImplArg p') - A.PTilde t -> ( scope,C.PTilde (mkExp scope t)) - -mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo]) -mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty - in if x == identW - then ( scope,(bt,i2i x,ty')) - else (x:scope,(bt,i2i x,ty'))) scope hyps - -mkDef gr arity (Just eqs) = Just ([C.Equ ps' (mkExp scope' e) | L _ (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps] - ,generateByteCode gr arity eqs - ) -mkDef gr arity Nothing = Nothing - -mkArity (Just a) _ ty = a -- known arity, i.e. defined function -mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom -mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor - in length ctxt - -genCncCats gr am cm cdefs = - let (index,cats) = mkCncCats 0 cdefs - in (index, Map.fromList cats) - where - mkCncCats index [] = (index,[]) - mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs) - | id == cInt = - let cc = pgfCncCat gr lincat fidInt - (index',cats) = mkCncCats index cdefs - in (index', (i2i id,cc) : cats) - | id == cFloat = - let cc = pgfCncCat gr lincat fidFloat - (index',cats) = mkCncCats index cdefs - in (index', (i2i id,cc) : cats) - | id == cString = - let cc = pgfCncCat gr lincat fidString - (index',cats) = mkCncCats index cdefs - in (index', (i2i id,cc) : cats) - | otherwise = - let cc@(C.CncCat _s e _) = pgfCncCat gr lincat index - (index',cats) = mkCncCats (e+1) cdefs - in (index', (i2i id,cc) : cats) - mkCncCats index (_ :cdefs) = mkCncCats index cdefs - -genCncFuns :: Grammar - -> ModuleName - -> ModuleName - -> Array SeqId Sequence - -> (Sequence -> Sequence -> Ordering) - -> Array SeqId Sequence - -> [(QIdent, Info)] - -> FId - -> Map.Map CId D.CncCat - -> (FId, - IntMap.IntMap (Set.Set D.Production), - IntMap.IntMap [FunId], - IntMap.IntMap [FunId], - Array FunId D.CncFun) -genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccats = - let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty - (fid_cnt2,funs_cnt2,funs2,prods) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty - in (fid_cnt2,prods,lindefs,linrefs,array (0,funs_cnt2-1) funs2) - where - mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs = - (fid_cnt,funs_cnt,funs,lindefs,linrefs) - mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs = - let !funs_cnt' = let (s_funid, e_funid) = bounds funs0 - in funs_cnt+(e_funid-s_funid+1) - lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0 - linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0 - funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0) - in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs' - mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs = - mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs - - mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods = - (fid_cnt,funs_cnt,funs,prods) - mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods = - let ---Ok ty_C = fmap GM.typeForm (Look.lookupFunType gr am id) - ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id) - !funs_cnt' = let (s_funid, e_funid) = bounds funs0 - in funs_cnt+(e_funid-s_funid+1) - !(fid_cnt',crc',prods') - = foldl' (toProd lindefs ty_C funs_cnt) - (fid_cnt,crc,prods) prods0 - funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0) - in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods' - mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods = - mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods - - toProd lindefs (ctxt_C,res_C,_) offs st (Production fid0 funid0 args0) = - let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0) - set0 = Set.fromList (map (C.PApply (offs+funid0)) (sequence args)) - fid = mkFId res_C fid0 - !prods' = case IntMap.lookup fid prods of - Just set -> IntMap.insert fid (Set.union set0 set) prods - Nothing -> IntMap.insert fid set0 prods - in (fid_cnt,crc,prods') - where - mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s ) = - case fid0s of - [fid0] -> (st,map (flip C.PArg (mkFId arg_C fid0)) ctxt) - fid0s -> case Map.lookup fids crc of - Just fid -> (st,map (flip C.PArg fid) ctxt) - Nothing -> let !crc' = Map.insert fids fid_cnt crc - !prods' = IntMap.insert fid_cnt (Set.fromList (map C.PCoerce fids)) prods - in ((fid_cnt+1,crc',prods'),map (flip C.PArg fid_cnt) ctxt) - where - (hargs_C,arg_C) = GM.catSkeleton ty - ctxt = mapM (mkCtxt lindefs) hargs_C - fids = map (mkFId arg_C) fid0s - - mkLinDefId id = prefixIdent "lindef " id - - toLinDef res offs lindefs (Production fid0 funid0 args) = - if args == [[fidVar]] - then IntMap.insertWith (++) fid [offs+funid0] lindefs - else lindefs - where - fid = mkFId res fid0 - - toLinRef res offs linrefs (Production fid0 funid0 [fargs]) = - if fid0 == fidVar - then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids - else linrefs - where - fids = map (mkFId res) fargs - - mkFId (_,cat) fid0 = - case Map.lookup (i2i cat) cnccats of - Just (C.CncCat s e _) -> s+fid0 - Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat) - - mkCtxt lindefs (_,cat) = - case Map.lookup (i2i cat) cnccats of - Just (C.CncCat s e _) -> [(C.fidVar,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]] - Nothing -> error "GrammarToPGF.mkCtxt failed" - - toCncFun offs (m,id) funs (funid0,lins0) = - let mseqs = case lookupModule gr m of - Ok (ModInfo{mseqs=Just mseqs}) -> mseqs - _ -> ex_seqs - in (offs+funid0,C.CncFun (i2i id) (amap (newIndex mseqs) lins0)):funs - where - newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs) - - binSearch v arr (i,j) - | i <= j = case ciCmp v (arr ! k) of - LT -> binSearch v arr (i,k-1) - EQ -> k - GT -> binSearch v arr (k+1,j) - | otherwise = error "binSearch" - where - k = (i+j) `div` 2 - -genPrintNames cdefs = - Map.fromAscList [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info] - where - prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr] - prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr] - prn _ = [] - - flatten (K s) = s - flatten (Alts x _) = flatten x - flatten (C x y) = flatten x +++ flatten y - -mkArray lst = listArray (0,length lst-1) lst -mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] +-- (an,abs) <- mkAbstr am +-- cncs <- mapM mkConcr (allConcretes gr am) +-- -- return $ updateProductionIndices (D.PGF Map.empty an abs (Map.fromList cncs)) +-- return $ D.PGF Map.empty an abs (Map.fromList cncs) +-- where +-- cenv = resourceValues opts gr +-- +-- mkAbstr am = return (mi2i am, D.Abstr flags funs cats) +-- where +-- aflags = err (const noOptions) mflags (lookupModule gr am) +-- +-- adefs = +-- [((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++ +-- Look.allOrigInfos gr am +-- +-- flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF aflags] +-- +-- funs = Map.fromList [(i2i f, (mkType [] ty, arity, mkDef gr arity mdef, 0)) | +-- ((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs, +-- let arity = mkArity ma mdef ty] +-- +-- cats = Map.fromList [(i2i c, (snd (mkContext [] cont),catfuns c, 0)) | +-- ((m,c),AbsCat (Just (L _ cont))) <- adefs] +-- +-- catfuns cat = +-- [(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True)) <- adefs, snd (GM.valCat ty) == cat] +-- +-- mkConcr cm = do +-- let cflags = err (const noOptions) mflags (lookupModule gr cm) +-- ciCmp | flag optCaseSensitive cflags = compare +-- | otherwise = C.compareCaseInsensitve +-- +-- (ex_seqs,cdefs) <- addMissingPMCFGs +-- Map.empty +-- ([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++ +-- Look.allOrigInfos gr cm) +-- +-- let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags] +-- +-- seqs = (mkArray . C.sortNubBy ciCmp . concat) $ +-- (Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm]) +-- +-- ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence +-- +-- !(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs +-- !(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns) +-- = genCncFuns gr am cm ex_seqs_arr ciCmp seqs cdefs fid_cnt1 cnccats +-- +-- printnames = genPrintNames cdefs +-- return (mi2i cm, D.Concr flags +-- printnames +-- cncfuns +-- lindefs +-- linrefs +-- seqs +-- productions +-- IntMap.empty +-- Map.empty +-- cnccats +-- IntMap.empty +-- fid_cnt2) +-- where +-- -- if some module was compiled with -no-pmcfg, then +-- -- we have to create the PMCFG code just before linking +-- addMissingPMCFGs seqs [] = return (seqs,[]) +-- addMissingPMCFGs seqs (((m,id), info):is) = do +-- (seqs,info) <- addPMCFG opts gr cenv Nothing am cm seqs id info +-- (seqs,is ) <- addMissingPMCFGs seqs is +-- return (seqs, ((m,id), info) : is) +-- +-- i2i :: Ident -> CId +-- i2i = utf8CId . ident2utf8 +-- +-- mi2i :: ModuleName -> CId +-- mi2i (MN i) = i2i i +-- +-- mkType :: [Ident] -> A.Type -> C.Type +-- mkType scope t = +-- case GM.typeForm t of +-- (hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps +-- in C.DTyp hyps' (i2i cat) (map (mkExp scope') args) +-- +-- mkExp :: [Ident] -> A.Term -> C.Expr +-- mkExp scope t = +-- case t of +-- Q (_,c) -> C.EFun (i2i c) +-- QC (_,c) -> C.EFun (i2i c) +-- Vr x -> case lookup x (zip scope [0..]) of +-- Just i -> C.EVar i +-- Nothing -> C.EMeta 0 +-- Abs b x t-> C.EAbs b (i2i x) (mkExp (x:scope) t) +-- App t1 t2-> C.EApp (mkExp scope t1) (mkExp scope t2) +-- EInt i -> C.ELit (C.LInt (fromIntegral i)) +-- EFloat f -> C.ELit (C.LFlt f) +-- K s -> C.ELit (C.LStr s) +-- Meta i -> C.EMeta i +-- _ -> C.EMeta 0 +-- +-- mkPatt scope p = +-- case p of +-- A.PP (_,c) ps->let (scope',ps') = mapAccumL mkPatt scope ps +-- in (scope',C.PApp (i2i c) ps') +-- A.PV x -> (x:scope,C.PVar (i2i x)) +-- A.PAs x p -> let (scope',p') = mkPatt scope p +-- in (x:scope',C.PAs (i2i x) p') +-- A.PW -> ( scope,C.PWild) +-- A.PInt i -> ( scope,C.PLit (C.LInt (fromIntegral i))) +-- A.PFloat f -> ( scope,C.PLit (C.LFlt f)) +-- A.PString s -> ( scope,C.PLit (C.LStr s)) +-- A.PImplArg p-> let (scope',p') = mkPatt scope p +-- in (scope',C.PImplArg p') +-- A.PTilde t -> ( scope,C.PTilde (mkExp scope t)) +-- +-- mkContext :: [Ident] -> A.Context -> ([Ident],[C.Hypo]) +-- mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty +-- in if x == identW +-- then ( scope,(bt,i2i x,ty')) +-- else (x:scope,(bt,i2i x,ty'))) scope hyps +-- +-- mkDef gr arity (Just eqs) = Just ([C.Equ ps' (mkExp scope' e) | L _ (ps,e) <- eqs, let (scope',ps') = mapAccumL mkPatt [] ps] +-- ,generateByteCode gr arity eqs +-- ) +-- mkDef gr arity Nothing = Nothing +-- +-- mkArity (Just a) _ ty = a -- known arity, i.e. defined function +-- mkArity Nothing (Just _) ty = 0 -- defined function with no arity - must be an axiom +-- mkArity Nothing _ ty = let (ctxt, _, _) = GM.typeForm ty -- constructor +-- in length ctxt +-- +-- genCncCats gr am cm cdefs = +-- let (index,cats) = mkCncCats 0 cdefs +-- in (index, Map.fromList cats) +-- where +-- mkCncCats index [] = (index,[]) +-- mkCncCats index (((m,id),CncCat (Just (L _ lincat)) _ _ _ _):cdefs) +-- | id == cInt = +-- let cc = pgfCncCat gr lincat fidInt +-- (index',cats) = mkCncCats index cdefs +-- in (index', (i2i id,cc) : cats) +-- | id == cFloat = +-- let cc = pgfCncCat gr lincat fidFloat +-- (index',cats) = mkCncCats index cdefs +-- in (index', (i2i id,cc) : cats) +-- | id == cString = +-- let cc = pgfCncCat gr lincat fidString +-- (index',cats) = mkCncCats index cdefs +-- in (index', (i2i id,cc) : cats) +-- | otherwise = +-- let cc@(C.CncCat _s e _) = pgfCncCat gr lincat index +-- (index',cats) = mkCncCats (e+1) cdefs +-- in (index', (i2i id,cc) : cats) +-- mkCncCats index (_ :cdefs) = mkCncCats index cdefs +-- +-- genCncFuns :: Grammar +-- -> ModuleName +-- -> ModuleName +-- -> Array SeqId Sequence +-- -> (Sequence -> Sequence -> Ordering) +-- -> Array SeqId Sequence +-- -> [(QIdent, Info)] +-- -> FId +-- -> Map.Map CId D.CncCat +-- -> (FId, +-- IntMap.IntMap (Set.Set D.Production), +-- IntMap.IntMap [FunId], +-- IntMap.IntMap [FunId], +-- Array FunId D.CncFun) +-- genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccats = +-- let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty +-- (fid_cnt2,funs_cnt2,funs2,prods) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty +-- in (fid_cnt2,prods,lindefs,linrefs,array (0,funs_cnt2-1) funs2) +-- where +-- mkCncCats [] fid_cnt funs_cnt funs lindefs linrefs = +-- (fid_cnt,funs_cnt,funs,lindefs,linrefs) +-- mkCncCats (((m,id),CncCat _ _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs linrefs = +-- let !funs_cnt' = let (s_funid, e_funid) = bounds funs0 +-- in funs_cnt+(e_funid-s_funid+1) +-- lindefs' = foldl' (toLinDef (am,id) funs_cnt) lindefs prods0 +-- linrefs' = foldl' (toLinRef (am,id) funs_cnt) linrefs prods0 +-- funs' = foldl' (toCncFun funs_cnt (m,mkLinDefId id)) funs (assocs funs0) +-- in mkCncCats cdefs fid_cnt funs_cnt' funs' lindefs' linrefs' +-- mkCncCats (_ :cdefs) fid_cnt funs_cnt funs lindefs linrefs = +-- mkCncCats cdefs fid_cnt funs_cnt funs lindefs linrefs +-- +-- mkCncFuns [] fid_cnt funs_cnt funs lindefs crc prods = +-- (fid_cnt,funs_cnt,funs,prods) +-- mkCncFuns (((m,id),CncFun _ _ _ (Just (PMCFG prods0 funs0))):cdefs) fid_cnt funs_cnt funs lindefs crc prods = +-- let ---Ok ty_C = fmap GM.typeForm (Look.lookupFunType gr am id) +-- ty_C = err error (\x -> x) $ fmap GM.typeForm (Look.lookupFunType gr am id) +-- !funs_cnt' = let (s_funid, e_funid) = bounds funs0 +-- in funs_cnt+(e_funid-s_funid+1) +-- !(fid_cnt',crc',prods') +-- = foldl' (toProd lindefs ty_C funs_cnt) +-- (fid_cnt,crc,prods) prods0 +-- funs' = foldl' (toCncFun funs_cnt (m,id)) funs (assocs funs0) +-- in mkCncFuns cdefs fid_cnt' funs_cnt' funs' lindefs crc' prods' +-- mkCncFuns (_ :cdefs) fid_cnt funs_cnt funs lindefs crc prods = +-- mkCncFuns cdefs fid_cnt funs_cnt funs lindefs crc prods +-- +-- toProd lindefs (ctxt_C,res_C,_) offs st (Production fid0 funid0 args0) = +-- let !((fid_cnt,crc,prods),args) = mapAccumL mkArg st (zip ctxt_C args0) +-- set0 = Set.fromList (map (C.PApply (offs+funid0)) (sequence args)) +-- fid = mkFId res_C fid0 +-- !prods' = case IntMap.lookup fid prods of +-- Just set -> IntMap.insert fid (Set.union set0 set) prods +-- Nothing -> IntMap.insert fid set0 prods +-- in (fid_cnt,crc,prods') +-- where +-- mkArg st@(fid_cnt,crc,prods) ((_,_,ty),fid0s ) = +-- case fid0s of +-- [fid0] -> (st,map (flip C.PArg (mkFId arg_C fid0)) ctxt) +-- fid0s -> case Map.lookup fids crc of +-- Just fid -> (st,map (flip C.PArg fid) ctxt) +-- Nothing -> let !crc' = Map.insert fids fid_cnt crc +-- !prods' = IntMap.insert fid_cnt (Set.fromList (map C.PCoerce fids)) prods +-- in ((fid_cnt+1,crc',prods'),map (flip C.PArg fid_cnt) ctxt) +-- where +-- (hargs_C,arg_C) = GM.catSkeleton ty +-- ctxt = mapM (mkCtxt lindefs) hargs_C +-- fids = map (mkFId arg_C) fid0s +-- +-- mkLinDefId id = prefixIdent "lindef " id +-- +-- toLinDef res offs lindefs (Production fid0 funid0 args) = +-- if args == [[fidVar]] +-- then IntMap.insertWith (++) fid [offs+funid0] lindefs +-- else lindefs +-- where +-- fid = mkFId res fid0 +-- +-- toLinRef res offs linrefs (Production fid0 funid0 [fargs]) = +-- if fid0 == fidVar +-- then foldr (\fid -> IntMap.insertWith (++) fid [offs+funid0]) linrefs fids +-- else linrefs +-- where +-- fids = map (mkFId res) fargs +-- +-- mkFId (_,cat) fid0 = +-- case Map.lookup (i2i cat) cnccats of +-- Just (C.CncCat s e _) -> s+fid0 +-- Nothing -> error ("GrammarToPGF.mkFId: missing category "++showIdent cat) +-- +-- mkCtxt lindefs (_,cat) = +-- case Map.lookup (i2i cat) cnccats of +-- Just (C.CncCat s e _) -> [(C.fidVar,fid) | fid <- [s..e], Just _ <- [IntMap.lookup fid lindefs]] +-- Nothing -> error "GrammarToPGF.mkCtxt failed" +-- +-- toCncFun offs (m,id) funs (funid0,lins0) = +-- let mseqs = case lookupModule gr m of +-- Ok (ModInfo{mseqs=Just mseqs}) -> mseqs +-- _ -> ex_seqs +-- in (offs+funid0,C.CncFun (i2i id) (amap (newIndex mseqs) lins0)):funs +-- where +-- newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs) +-- +-- binSearch v arr (i,j) +-- | i <= j = case ciCmp v (arr ! k) of +-- LT -> binSearch v arr (i,k-1) +-- EQ -> k +-- GT -> binSearch v arr (k+1,j) +-- | otherwise = error "binSearch" +-- where +-- k = (i+j) `div` 2 +-- +-- genPrintNames cdefs = +-- Map.fromAscList [(i2i id, name) | ((m,id),info) <- cdefs, name <- prn info] +-- where +-- prn (CncFun _ _ (Just (L _ tr)) _) = [flatten tr] +-- prn (CncCat _ _ _ (Just (L _ tr)) _) = [flatten tr] +-- prn _ = [] +-- +-- flatten (K s) = s +-- flatten (Alts x _) = flatten x +-- flatten (C x y) = flatten x +++ flatten y +-- +-- mkArray lst = listArray (0,length lst-1) lst +-- mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] diff --git a/src/compiler/GF/Compiler.hs b/src/compiler/GF/Compiler.hs index 2f043673b..df477ca8d 100644 --- a/src/compiler/GF/Compiler.hs +++ b/src/compiler/GF/Compiler.hs @@ -3,6 +3,8 @@ module GF.Compiler (mainGFC, linkGrammars, writePGF, writeOutputs) where import PGF import PGF.Internal(concretes,optimizePGF,unionPGF) import PGF.Internal(putSplitAbs,encodeFile,runPut) +import LPGF(LPGF) +import qualified LPGF import GF.Compile as S(batchCompile,link,linkl,srcAbsName) import GF.CompileInParallel as P(parallelBatchCompile) import GF.Compile.Export @@ -97,10 +99,8 @@ compileSourceFiles opts fs = -- recreated. Calls 'writePGF' and 'writeOutputs'. linkGrammars :: Options -> (UTCTime,[(ModuleName, Grammar)]) -> IOE () linkGrammars opts (_,cnc_grs) | FmtLPGF `elem` flag optOutputFormats opts = do - pgfs <- mapM (linkl opts) cnc_grs - let pgf0 = foldl1 unionPGF pgfs - writePGF opts pgf0 - putStrLn "LPGF" + lpgf <- linkl opts (head cnc_grs) + writeLPGF opts lpgf linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) = do let abs = render (srcAbsName gr cnc) pgfFile = outputPath opts (grammarName' opts abs<.>"pgf") @@ -186,6 +186,12 @@ writePGF opts pgf = let outfile = outputPath opts (showCId (fst cnc) <.> "pgf_c") writing opts outfile $ encodeFile outfile cnc +writeLPGF :: Options -> LPGF -> IOE () +writeLPGF opts lpgf = do + let + grammarName = fromMaybe (showCId (LPGF.abstractName lpgf)) (flag optName opts) + outfile = outputPath opts (grammarName <.> "lpgf") + writing opts outfile $ liftIO $ LPGF.encodeFile outfile lpgf writeOutput :: Options -> FilePath-> String -> IOE () writeOutput opts file str = writing opts path $ writeUTF8File path str diff --git a/src/runtime/haskell/LPGF.hs b/src/runtime/haskell/LPGF.hs new file mode 100644 index 000000000..631f8a7d6 --- /dev/null +++ b/src/runtime/haskell/LPGF.hs @@ -0,0 +1,199 @@ +-- | Linearisation-only PGF format +-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009) +-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars" +module LPGF ( + LPGF, + abstractName, + linearize, linearizeConcr, + readLPGF, + + -- internal/testing + encodeFile, + zero + ) where + +import PGF (Language, readLanguage, showLanguage) +import PGF.CId +import PGF.Tree + +import Control.Monad (forM_) +import qualified Data.Map as Map +import Text.Printf (printf) + +-- | Linearisation-only PGF +data LPGF = LPGF { + absname :: CId, + abstract :: Abstr, + concretes :: Map.Map CId Concr +} deriving (Read, Show) + +-- | Abstract syntax +data Abstr = Abstr { + cats :: Map.Map CId (), + funs :: Map.Map CId Type +} deriving (Read, Show) + +-- | Concrete syntax +data Concr = Concr { + lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category + lins :: Map.Map CId LinFun -- ^ assigning a linearization function to each function +} deriving (Read, Show) + +-- | Abstract function type +data Type = Type [CId] CId + deriving (Read, Show) + +-- | Linearisation type +data LinType = + LTStr + | LTInt Int + | LTProduct [LinType] + deriving (Read, Show) + +-- | Linearisation function +data LinFun = + LFEmpty + | LFToken String + | LFConcat LinFun LinFun + | LFInt Int + | LFTuple [LinFun] + | LFProjection LinFun LinFun -- ^ In order for the projection to be well-formed, t1 must be a tuple and t2 an integer within the bounds of the size of the tuple + | LFArgument Int + deriving (Read, Show) + +abstractName :: LPGF -> CId +abstractName = absname + +encodeFile :: FilePath -> LPGF -> IO () +encodeFile path lpgf = writeFile path (show lpgf) + +readLPGF :: FilePath -> IO LPGF +readLPGF path = read <$> readFile path + +-- | Helper for building concat trees +mkConcat :: [LinFun] -> LinFun +mkConcat [] = LFEmpty +mkConcat [x] = x +mkConcat xs = foldl1 LFConcat xs + +-- | Main linearize function +linearize :: LPGF -> Language -> Tree -> String +linearize lpgf lang = + case Map.lookup lang (concretes lpgf) of + Just concr -> linearizeConcr concr + Nothing -> error $ printf "Unknown language: %s" (showCId lang) + +-- | Language-specific linearize function +-- Section 2.5 +linearizeConcr :: Concr -> Tree -> String +linearizeConcr concr tree = lin2string $ lin tree + where + lin :: Tree -> LinFun + lin tree = case tree of + Fun f as -> v + where + Just t = Map.lookup f (lins concr) + ts = map lin as + v = eval ts t + x -> error $ printf "Cannot lin %s" (prTree x) + +-- | Evaluation context is a sequence of terms +type Context = [LinFun] + +-- | Operational semantics, Table 2 +eval :: Context -> LinFun -> LinFun +eval cxt t = case t of + LFEmpty -> LFEmpty + LFToken tok -> LFToken tok + LFConcat s t -> LFConcat v w + where + v = eval cxt s + w = eval cxt t + LFInt i -> LFInt i + LFTuple ts -> LFTuple vs + where vs = map (eval cxt) ts + LFProjection t u -> vs !! (i-1) + where + LFTuple vs = eval cxt t + LFInt i = eval cxt u + LFArgument i -> cxt !! (i-1) + +-- | Turn concrete syntax terms into an actual string +lin2string :: LinFun -> String +lin2string l = case l of + LFEmpty -> "" + LFToken tok -> tok + LFConcat l1 l2 -> unwords [lin2string l1, lin2string l2] + x -> printf "[%s]" (show x) + +--- + +main :: IO () +main = + forM_ [tree1, tree2, tree3] $ \tree -> do + putStrLn (prTree tree) + forM_ (Map.toList (concretes zero)) $ \(lang,concr) -> + printf "%s: %s\n" (showLanguage lang) (linearizeConcr concr tree) + putStrLn "" + +-- Pred John Walk +tree1 :: Tree +tree1 = Fun (mkCId "Pred") [Fun (mkCId "John") [], Fun (mkCId "Walk") []] + +-- Pred We Walk +tree2 :: Tree +tree2 = Fun (mkCId "Pred") [Fun (mkCId "We") [], Fun (mkCId "Walk") []] + +-- And (Pred John Walk) (Pred We Walk) +tree3 :: Tree +tree3 = Fun (mkCId "And") [tree1, tree2] + +-- Initial LPGF, Figures 6 & 7 +zero :: LPGF +zero = LPGF { + absname = mkCId "Zero", + abstract = Abstr { + cats = Map.fromList [ + (mkCId "S", ()), + (mkCId "NP", ()), + (mkCId "VP", ()) + ], + funs = Map.fromList [ + (mkCId "And", Type [mkCId "S", mkCId "S"] (mkCId "S")), + (mkCId "Pred", Type [mkCId "NP", mkCId "VP"] (mkCId "S")), + (mkCId "John", Type [] (mkCId "NP")), + (mkCId "We", Type [] (mkCId "NP")), + (mkCId "Walk", Type [] (mkCId "VP")) + ] + }, + concretes = Map.fromList [ + (mkCId "ZeroEng", Concr { + lincats = Map.fromList [ + (mkCId "S", LTStr), + (mkCId "NP", LTProduct [LTStr, LTInt 2]), + (mkCId "VP", LTProduct [LTStr, LTStr]) + ], + lins = Map.fromList [ + (mkCId "And", mkConcat [LFArgument 1, LFToken "and", LFArgument 2]), + (mkCId "Pred", mkConcat [LFProjection (LFArgument 1) (LFInt 1), LFProjection (LFArgument 2) (LFProjection (LFArgument 1) (LFInt 2))]), + (mkCId "John", LFTuple [LFToken "John", LFInt 1]), + (mkCId "We", LFTuple [LFToken "we", LFInt 2]), + (mkCId "Walk", LFTuple [LFToken "walks", LFToken "walk"]) + ] + }), + (mkCId "ZeroGer", Concr { + lincats = Map.fromList [ + (mkCId "S", LTStr), + (mkCId "NP", LTProduct [LTStr, LTInt 2, LTInt 3]), + (mkCId "VP", LTProduct [LTProduct [LTStr, LTStr, LTStr], LTProduct [LTStr, LTStr, LTStr]]) + ], + lins = Map.fromList [ + (mkCId "And", mkConcat [LFArgument 1, LFToken "und", LFArgument 2]), + (mkCId "Pred", mkConcat [LFProjection (LFArgument 1) (LFInt 1), LFProjection (LFProjection (LFArgument 2) (LFProjection (LFArgument 1) (LFInt 2))) (LFProjection (LFArgument 1) (LFInt 3))]), + (mkCId "John", LFTuple [LFToken "John", LFInt 1, LFInt 3]), + (mkCId "We", LFTuple [LFToken "wir", LFInt 2, LFInt 1]), + (mkCId "Walk", LFTuple [LFTuple [LFToken "gehe", LFToken "gehst", LFToken "geht"], LFTuple [LFToken "gehen", LFToken "geht", LFToken "gehen"]]) + ] + }) + ] +}