PGF is now real synchronous PMCFG

This commit is contained in:
krasimir
2010-01-17 21:35:36 +00:00
parent 362f333ebd
commit a039808141
23 changed files with 296 additions and 599 deletions

View File

@@ -43,24 +43,18 @@ library
PGF.Expr PGF.Expr
PGF.Type PGF.Type
PGF.Tree PGF.Tree
PGF.PMCFG
PGF.Paraphrase PGF.Paraphrase
PGF.TypeCheck PGF.TypeCheck
PGF.Binary PGF.Binary
PGF.Morphology PGF.Morphology
PGF.VisualizeTree PGF.VisualizeTree
PGF.Printer
GF.Data.TrieMap GF.Data.TrieMap
GF.Data.Utilities GF.Data.Utilities
GF.Data.SortedList GF.Data.SortedList
GF.Data.ErrM GF.Data.ErrM
GF.Data.Relation GF.Data.Relation
GF.Data.Operations GF.Data.Operations
-- needed only for the on demand generation of PMCFG
GF.Infra.GetOpt
GF.Infra.Option
GF.Data.ErrM
GF.Data.BacktrackM
GF.Compile.GeneratePMCFG
-- not really part of GF but I have changed the original binary library -- not really part of GF but I have changed the original binary library
-- and we have to keep the copy for now. -- and we have to keep the copy for now.
Data.Binary Data.Binary
@@ -141,7 +135,6 @@ executable gf
GF.Compile.Abstract.Compute GF.Compile.Abstract.Compute
GF.Compile.Optimize GF.Compile.Optimize
GF.Compile.SubExOpt GF.Compile.SubExOpt
GF.Compile.OptimizePGF
GF.Compile.ModDeps GF.Compile.ModDeps
GF.Compile.GetGrammar GF.Compile.GetGrammar
GF.Compile.PGFtoHaskell GF.Compile.PGFtoHaskell
@@ -156,7 +149,6 @@ executable gf
PGF.Expr PGF.Expr
PGF.Type PGF.Type
PGF.Tree PGF.Tree
PGF.PMCFG
PGF.Macros PGF.Macros
PGF.Generate PGF.Generate
PGF.Linearize PGF.Linearize
@@ -164,6 +156,7 @@ executable gf
PGF.Paraphrase PGF.Paraphrase
PGF.TypeCheck PGF.TypeCheck
PGF.Binary PGF.Binary
PGF.Printer
GFC GFC
GFI GFI

View File

@@ -213,7 +213,7 @@ langsDemo = langsLang `except` ["Ara","Hin","Ina","Tha"]
langsParse = langs `only` ["Eng"] langsParse = langs `only` ["Eng"]
-- languages for which langs.pgf is built -- languages for which langs.pgf is built
langsPGF = langsLang `except` ["Ara","Bul","Hin","Ron","Tha"] langsPGF = langsLang `except` ["Ara","Hin","Ron","Tha"]
-- languages for which Compatibility exists (to be extended) -- languages for which Compatibility exists (to be extended)
langsCompat = langsLang `only` ["Cat","Eng","Fin","Fre","Ita","Spa","Swe"] langsCompat = langsLang `only` ["Cat","Eng","Fin","Fre","Ita","Spa","Swe"]
@@ -297,15 +297,14 @@ unlexer abstr ls =
-- | Runs the gf executable in compile mode with the given arguments. -- | Runs the gf executable in compile mode with the given arguments.
run_gfc :: PackageDescription -> LocalBuildInfo -> [String] -> IO () run_gfc :: PackageDescription -> LocalBuildInfo -> [String] -> IO ()
run_gfc pkg lbi args = run_gfc pkg lbi args =
do let args' = ["-batch","-gf-lib-path="++rgl_src_dir] ++ filter (not . null) args ++ ["+RTS"] ++ rts_flags ++ ["-RTS"] do let args' = ["-batch","-gf-lib-path="++rgl_src_dir] ++ filter (not . null) args
gf = default_gf pkg lbi gf = default_gf pkg lbi
putStrLn $ "Running: " ++ gf ++ " " ++ unwords (map showArg args') putStrLn $ "Running: " ++ gf ++ " " ++ unwords (map showArg args')
e <- rawSystem gf args' e <- rawSystem gf args'
case e of case e of
ExitSuccess -> return () ExitSuccess -> return ()
ExitFailure i -> die $ "gf exited with exit code: " ++ show i ExitFailure i -> die $ "gf exited with exit code: " ++ show i
where rts_flags = ["-K64M"] where showArg arg = "'" ++ arg ++ "'"
showArg arg = "'" ++ arg ++ "'"
default_gf pkg lbi = buildDir lbi </> exeName' </> exeNameReal default_gf pkg lbi = buildDir lbi </> exeName' </> exeNameReal
where where

View File

@@ -19,6 +19,7 @@ import PGF.VisualizeTree
import PGF.Macros import PGF.Macros
import PGF.Data ---- import PGF.Data ----
import PGF.Morphology import PGF.Morphology
import PGF.Printer
import GF.Compile.Export import GF.Compile.Export
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..)) import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
import GF.Infra.UseIO import GF.Infra.UseIO
@@ -752,22 +753,17 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts arg -> do exec = \opts arg -> do
case arg of case arg of
[EFun id] -> case Map.lookup id (funs (abstract pgf)) of [EFun id] -> case Map.lookup id (funs (abstract pgf)) of
Just (ty,_,eqs) -> return $ fromString $ Just fd -> return $ fromString $
render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$ render (ppFun id fd)
if null eqs Nothing -> case Map.lookup id (cats (abstract pgf)) of
then empty Just hyps -> do return $ fromString $
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts render (ppCat id hyps $$
in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]) if null (functionsToCat pgf id)
Nothing -> case Map.lookup id (cats (abstract pgf)) of then empty
Just hyps -> do return $ fromString $ else space $$
render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)) $$ vcat [ppFun fid (ty,0,[]) | (fid,ty) <- functionsToCat pgf id])
if null (functionsToCat pgf id) Nothing -> do putStrLn ("unknown category of function identifier "++show id)
then empty return void
else space $$
text "fun" <+> vcat [ppCId fid <+> colon <+> ppType 0 [] ty
| (fid,ty) <- functionsToCat pgf id])
Nothing -> do putStrLn ("unknown category of function identifier "++show id)
return void
[e] -> case inferExpr pgf e of [e] -> case inferExpr pgf e of
Left tcErr -> error $ render (ppTcError tcErr) Left tcErr -> error $ render (ppTcError tcErr)
Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e) Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e)
@@ -782,8 +778,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
enc = encodeUnicode cod enc = encodeUnicode cod
par opts s = case optOpenTypes opts of par opts s = case optOpenTypes opts of
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang] [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts]
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang] open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
void = ([],[]) void = ([],[])

View File

@@ -6,7 +6,6 @@ import GF.Compile.Rename
import GF.Compile.CheckGrammar import GF.Compile.CheckGrammar
import GF.Compile.Optimize import GF.Compile.Optimize
import GF.Compile.SubExOpt import GF.Compile.SubExOpt
import GF.Compile.OptimizePGF
import GF.Compile.GrammarToPGF import GF.Compile.GrammarToPGF
import GF.Compile.ReadFiles import GF.Compile.ReadFiles
import GF.Compile.Update import GF.Compile.Update
@@ -54,31 +53,16 @@ compileToPGF opts fs =
link :: Options -> String -> SourceGrammar -> IOE PGF link :: Options -> String -> SourceGrammar -> IOE PGF
link opts cnc gr = do link opts cnc gr = do
let isv = (verbAtLeast opts Normal) let isv = (verbAtLeast opts Normal)
gc1 <- putPointE Normal opts "linking ... " $ putPointE Normal opts "linking ... " $ do
let (abs,gc0) = mkCanon2gfcc opts cnc gr gc0 <- ioeIO (mkCanon2pgf opts cnc gr)
in case checkPGF gc0 of case checkPGF gc0 of
Ok (gc,b) -> do Ok (gc,b) -> do case (isv,b) of
case (isv,b) of (True, True) -> ioeIO $ putStrLn "OK"
(True, True) -> ioeIO $ putStrLn "OK" (False,True) -> return ()
(False,True) -> return () _ -> ioeIO $ putStrLn $ "Corrupted PGF"
_ -> ioeIO $ putStrLn $ "Corrupted PGF" return gc
return gc Bad s -> fail s
Bad s -> fail s
ioeIO $ buildParser opts $ optimize opts gc1
optimize :: Options -> PGF -> PGF
optimize opts = cse . suf
where os = flag optOptimizations opts
cse = if OptCSE `Set.member` os then cseOptimize else id
suf = if OptStem `Set.member` os then suffixOptimize else id
buildParser :: Options -> PGF -> IO PGF
buildParser opts =
case flag optBuildParser opts of
BuildParser -> addParsers opts
DontBuildParser -> return
BuildParserOnDemand -> return . mapConcretes (\cnc -> cnc { cflags = Map.insert (mkCId "parser") "ondemand" (cflags cnc) })
batchCompile :: Options -> [FilePath] -> IOE SourceGrammar batchCompile :: Options -> [FilePath] -> IOE SourceGrammar
batchCompile opts files = do batchCompile opts files = do

View File

@@ -2,10 +2,10 @@ module GF.Compile.Export where
import PGF.CId import PGF.CId
import PGF.Data (PGF(..)) import PGF.Data (PGF(..))
import PGF.Printer
import GF.Compile.PGFtoHaskell import GF.Compile.PGFtoHaskell
import GF.Compile.PGFtoProlog import GF.Compile.PGFtoProlog
import GF.Compile.PGFtoJS import GF.Compile.PGFtoJS
import GF.Compile.PGFPretty
import GF.Infra.Option import GF.Infra.Option
import GF.Speech.CFG import GF.Speech.CFG
import GF.Speech.PGFToCFG import GF.Speech.PGFToCFG
@@ -20,6 +20,7 @@ import GF.Speech.PrRegExp
import Data.Maybe import Data.Maybe
import System.FilePath import System.FilePath
import Text.PrettyPrint
-- top-level access to code generation -- top-level access to code generation
@@ -29,8 +30,7 @@ exportPGF :: Options
-> [(FilePath,String)] -- ^ List of recommended file names and contents. -> [(FilePath,String)] -- ^ List of recommended file names and contents.
exportPGF opts fmt pgf = exportPGF opts fmt pgf =
case fmt of case fmt of
FmtPGFPretty -> multi "txt" prPGFPretty FmtPGFPretty -> multi "txt" (render . ppPGF)
FmtPMCFGPretty -> single "pmcfg" prPMCFGPretty
FmtJavaScript -> multi "js" pgf2js FmtJavaScript -> multi "js" pgf2js
FmtHaskell -> multi "hs" (grammar2haskell opts name) FmtHaskell -> multi "hs" (grammar2haskell opts name)
FmtProlog -> multi "pl" grammar2prolog FmtProlog -> multi "pl" grammar2prolog

View File

@@ -35,24 +35,20 @@ import Control.Exception
-- main conversion function -- main conversion function
convertConcrete :: Options -> Abstr -> CId -> Concr -> IO ParserInfo --convertConcrete :: Options -> Abstr -> CId -> Concr -> IO Concr
convertConcrete opts abs lang cnc = do convertConcrete opts lang flags printnames abs_defs cnc_defs lincats params lin_defs = do
let env0 = emptyGrammarEnv cnc_defs cat_defs params let env0 = emptyGrammarEnv cnc_defs cat_defs params
when (flag optProf opts) $ do when (flag optProf opts) $ do
profileGrammar lang cnc_defs env0 pfrules profileGrammar lang cnc_defs env0 pfrules
let env1 = expandHOAS abs_defs cnc_defs cat_defs lin_defs env0 let env1 = expandHOAS abs_defs cnc_defs cat_defs lin_defs env0
env2 = List.foldl' (convertRule cnc_defs) env1 pfrules env2 = List.foldl' (convertRule cnc_defs) env1 pfrules
return $ getParserInfo env2 return $ getParserInfo flags printnames env2
where where
abs_defs = Map.assocs (funs abs) cat_defs = Map.insert cidVar (S []) lincats
cnc_defs = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
cat_defs = Map.insert cidVar (S []) (lincats cnc)
params = paramlincats cnc
lin_defs = lindefs cnc
pfrules = [ pfrules = [
(PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) | (PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) |
(id, (ty,_,_)) <- abs_defs, let (args,res) = typeSkeleton ty, (id, (ty,_,_)) <- Map.toList abs_defs, let (args,res) = typeSkeleton ty,
term <- maybeToList (Map.lookup id cnc_defs)] term <- maybeToList (Map.lookup id cnc_defs)]
findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
@@ -364,7 +360,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats) foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats)
where where
hoTypes :: [(Int,CId)] hoTypes :: [(Int,CId)]
hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- Map.toList abs_defs
, (n,c) <- fst (typeSkeleton ty), n > 0] , (n,c) <- fst (typeSkeleton ty), n > 0]
-- add a range of PMCFG categories for each GF high-order category -- add a range of PMCFG categories for each GF high-order category
@@ -438,16 +434,18 @@ addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fc
Nothing -> let !fcat = last_id+1 Nothing -> let !fcat = last_id+1
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat) in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
getParserInfo :: GrammarEnv -> ParserInfo getParserInfo :: Map.Map CId String -> Map.Map CId String -> GrammarEnv -> Concr
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = getParserInfo flags printnames (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet Concr { cflags = flags
, sequences = mkArray seqSet , printnames = printnames
, productions = IntMap.union prodSet coercions , functions = mkArray funSet
, pproductions = IntMap.empty , sequences = mkArray seqSet
, lproductions = Map.empty , productions = IntMap.union prodSet coercions
, startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet) , pproductions = IntMap.empty
, totalCats = last_id+1 , lproductions = Map.empty
} , startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
where where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]

View File

@@ -1,11 +1,12 @@
{-# LANGUAGE PatternGuards #-} {-# LANGUAGE PatternGuards #-}
module GF.Compile.GrammarToPGF (mkCanon2gfcc,addParsers) where module GF.Compile.GrammarToPGF (mkCanon2pgf) where
import GF.Compile.Export import GF.Compile.Export
import GF.Compile.GeneratePMCFG import GF.Compile.GeneratePMCFG
import PGF.CId import PGF.CId
import PGF.Macros(updateProductionIndices) import PGF.Macros(updateProductionIndices)
import PGF.Check(checkLin)
import qualified PGF.Macros as CM import qualified PGF.Macros as CM
import qualified PGF.Data as C import qualified PGF.Data as C
import qualified PGF.Data as D import qualified PGF.Data as D
@@ -36,28 +37,22 @@ traceD s t = t
-- the main function: generate PGF from GF. -- the main function: generate PGF from GF.
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF) mkCanon2pgf :: Options -> String -> SourceGrammar -> IO D.PGF
mkCanon2gfcc opts cnc gr = mkCanon2pgf opts cnc gr = (canon2pgf opts pars . reorder abs . canon2canon opts abs) gr
(showIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon opts abs) gr)
where where
abs = err (const c) id $ M.abstractOfConcrete gr c where c = identC (BS.pack cnc) abs = err (const c) id $ M.abstractOfConcrete gr c where c = identC (BS.pack cnc)
pars = mkParamLincat gr pars = mkParamLincat gr
-- Adds parsers for all concretes
addParsers :: Options -> D.PGF -> IO D.PGF
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
return $ updateProductionIndices $ pgf { D.concretes = Map.fromList cncs }
where
conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc
return (lang,cnc { D.parser = Just pinfo })
-- Generate PGF from GFCM. -- Generate PGF from GFCM.
-- this assumes a grammar translated by canon2canon -- this assumes a grammar translated by canon2canon
canon2gfcc :: Options -> (Ident -> Ident -> C.Term) -> SourceGrammar -> D.PGF canon2pgf :: Options -> (Ident -> Ident -> C.Term) -> SourceGrammar -> IO D.PGF
canon2gfcc opts pars cgr@(M.MGrammar ((a,abm):cms)) = canon2pgf opts pars cgr@(M.MGrammar ((a,abm):cms)) = do
(if dump opts DumpCanon then trace (render (vcat (map (ppModule Qualified) (M.modules cgr)))) else id) $ if dump opts DumpCanon
D.PGF an cns gflags abs cncs then putStrLn (render (vcat (map (ppModule Qualified) (M.modules cgr))))
else return ()
cncs <- sequence [mkConcr lang (i2i lang) mo | (lang,mo) <- cms]
return (D.PGF an cns gflags abs (Map.fromList cncs))
where where
-- abstract -- abstract
an = (i2i a) an = (i2i a)
@@ -82,13 +77,15 @@ canon2gfcc opts pars cgr@(M.MGrammar ((a,abm):cms)) =
catfuns = Map.fromList catfuns = Map.fromList
[(cat,[f | (f, (C.DTyp _ c _,_,_)) <- lfuns, c==cat]) | (cat,_) <- lcats] [(cat,[f | (f, (C.DTyp _ c _,_,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
cncs = Map.fromList [mkConcr lang (i2i lang) mo | (lang,mo) <- cms] mkConcr lang0 lang mo = do
mkConcr lang0 lang mo = lins' <- case mapM (checkLin (funs,lins,lincats) lang) (Map.toList lins) of
(lang,D.Concr flags lins opers lincats lindefs printnames params fcfg) Ok x -> return x
Bad msg -> fail msg
cnc <- convertConcrete opts lang flags printnames funs (Map.fromList (map fst lins')) lincats params lindefs
return (lang, cnc)
where where
js = tree2list (M.jments mo) js = tree2list (M.jments mo)
flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF (M.flags mo)] flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF (M.flags mo)]
opers = Map.fromAscList [] -- opers will be created as optimization
utf = id -- trace (show lang0 +++ show flags) $ utf = id -- trace (show lang0 +++ show flags) $
-- if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8 -- if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8
-- then id else id -- then id else id

View File

@@ -1,118 +0,0 @@
module GF.Compile.OptimizePGF where
import PGF.CId
import PGF.Data
import PGF.Macros
import GF.Data.Operations
import Data.List
import qualified Data.Map as Map
-- back-end optimization:
-- suffix analysis followed by common subexpression elimination
optPGF :: PGF -> PGF
optPGF = cseOptimize . suffixOptimize
suffixOptimize :: PGF -> PGF
suffixOptimize = mapConcretes opt
where
opt cnc = cnc {
lins = Map.map optTerm (lins cnc),
lindefs = Map.map optTerm (lindefs cnc)
}
cseOptimize :: PGF -> PGF
cseOptimize = mapConcretes subex
-- analyse word form lists into prefix + suffixes
-- suffix sets can later be shared by subex elim
optTerm :: Term -> Term
optTerm tr = case tr of
R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | K (KS s) <- ts]
R ts -> R $ map optTerm ts
P t v -> P (optTerm t) v
_ -> tr
where
optToks ss = prf : suffs where
prf = pref (head ss) (tail ss)
suffs = map (drop (length prf)) ss
pref cand ss = case ss of
s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss
_ -> cand
isK t = case t of
K (KS _) -> True
_ -> False
mkSuff ("":ws) = R (map (K . KS) ws)
mkSuff (p:ws) = W p (R (map (K . KS) ws))
-- common subexpression elimination
---subex :: [(CId,Term)] -> [(CId,Term)]
subex :: Concr -> Concr
subex cnc = err error id $ do
(tree,_) <- appSTM (getSubtermsMod cnc) (Map.empty,0)
return $ addSubexpConsts tree cnc
type TermList = Map.Map Term (Int,Int) -- number of occs, id
type TermM a = STM (TermList,Int) a
addSubexpConsts :: TermList -> Concr -> Concr
addSubexpConsts tree cnc = cnc {
opers = Map.fromList [(f,recomp f trm) | (f,trm) <- ops],
lins = rec lins,
lindefs = rec lindefs
}
where
ops = [(fid id, trm) | (trm,(_,id)) <- Map.assocs tree]
mkOne (f,trm) = (f, recomp f trm)
recomp f t = case Map.lookup t tree of
Just (_,id) | fid id /= f -> F $ fid id -- not to replace oper itself
_ -> case t of
R ts -> R $ map (recomp f) ts
S ts -> S $ map (recomp f) ts
W s t -> W s (recomp f t)
P t p -> P (recomp f t) (recomp f p)
_ -> t
fid n = mkCId $ "_" ++ show n
rec field = Map.fromAscList [(f,recomp f trm) | (f,trm) <- Map.assocs (field cnc)]
getSubtermsMod :: Concr -> TermM TermList
getSubtermsMod cnc = do
mapM getSubterms (Map.assocs (lins cnc))
mapM getSubterms (Map.assocs (lindefs cnc))
(tree0,_) <- readSTM
return $ Map.filter (\ (nu,_) -> nu > 1) tree0
where
getSubterms (f,trm) = collectSubterms trm >> return ()
collectSubterms :: Term -> TermM ()
collectSubterms t = case t of
R ts -> do
mapM collectSubterms ts
add t
S ts -> do
mapM collectSubterms ts
add t
W s u -> do
collectSubterms u
add t
P p u -> do
collectSubterms p
collectSubterms u
add t
_ -> return ()
where
add t = do
(ts,i) <- readSTM
let
((count,id),next) = case Map.lookup t ts of
Just (nu,id) -> ((nu+1,id), i)
_ -> ((1, i ), i+1)
writeSTM (Map.insert t (count,id) ts, next)

View File

@@ -1,94 +0,0 @@
-- | Print a part of a PGF grammar on the human-readable format used in
-- the paper "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
module GF.Compile.PGFPretty (prPGFPretty, prPMCFGPretty) where
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.PMCFG
import GF.Data.Operations
import Data.Map (Map)
import qualified Data.Map as Map
import Text.PrettyPrint.HughesPJ
prPGFPretty :: PGF -> String
prPGFPretty pgf = render $ prAbs (abstract pgf) $$ prAll (prCnc (abstract pgf)) (concretes pgf)
prPMCFGPretty :: PGF -> CId -> String
prPMCFGPretty pgf lang = render $
case lookParser pgf lang of
Nothing -> empty
Just pinfo -> text "language" <+> ppCId lang $$ ppPMCFG pinfo
prAbs :: Abstr -> Doc
prAbs a = prAll prCat (cats a) $$ prAll prFun (funs a)
prCat :: CId -> [Hypo] -> Doc
prCat c h | isLiteralCat c = empty
| otherwise = text "cat" <+> ppCId c
prFun :: CId -> (Type,Int,[Equation]) -> Doc
prFun f (t,_,_) = text "fun" <+> ppCId f <+> text ":" <+> prType t
prType :: Type -> Doc
prType t = parens (hsep (punctuate (text ",") (map ppCId cs))) <+> text "->" <+> ppCId c
where (cs,c) = catSkeleton t
-- FIXME: show concrete name
-- FIXME: inline opers first
prCnc :: Abstr -> CId -> Concr -> Doc
prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c))
where
prLinCat :: CId -> Term -> Doc
prLinCat c t | isLiteralCat c = empty
| otherwise = text "lincat" <+> ppCId c <+> text "=" <+> pr 0 t
where
pr p (R ts) = prec p 1 (hsep (punctuate (text " *") (map (pr 1) ts)))
pr _ (S []) = text "Str"
pr _ (C n) = text "Int_" <> text (show (n+1))
prLin :: CId -> Term -> Doc
prLin f t = text "lin" <+> ppCId f <+> text "=" <+> pr 0 t
where
pr :: Int -> Term -> Doc
pr p (R ts) = text "<" <+> hsep (punctuate (text ",") (map (pr 0) ts)) <+> text ">"
pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2)
pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts)))
pr p (K (KS t)) = doubleQuotes (text t)
pr p (K _) = empty
pr p (V i) = text ("argv_" ++ show (i+1))
pr p (C i) = text (show (i+1))
pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts)))
pr _ t = error $ "PGFPretty.prLin " ++ show t
linCat :: Concr -> CId -> Term
linCat cnc c = Map.findWithDefault (error $ "lincat: " ++ showCId c) c (lincats cnc)
prec :: Int -> Int -> Doc -> Doc
prec p m | p >= m = parens
| otherwise = id
expand :: Concr -> Concr
expand cnc = cnc { lins = Map.map (f "") (lins cnc) }
where
-- FIXME: handle KP
f :: String -> Term -> Term
f w (R ts) = R (map (f w) ts)
f w (P t1 t2) = P (f w t1) (f w t2)
f w (S []) = S (if null w then [] else [K (KS w)])
f w (S (t:ts)) = S (f w t : map (f "") ts)
f w (FV ts) = FV (map (f w) ts)
f w (W s t) = f (w++s) t
f w (K (KS t)) = K (KS (w++t))
f w (F o) = f w (Map.findWithDefault (error $ "Bad oper: " ++ showCId o) o (opers cnc))
f w t = t
-- Utilities
prAll :: (a -> b -> Doc) -> Map a b -> Doc
prAll p m = vcat [ p k v | (k,v) <- Map.toList m]

View File

@@ -29,7 +29,7 @@ pgf2js pgf =
start = showCId $ M.lookStartCat pgf start = showCId $ M.lookStartCat pgf
grammar = new "GFGrammar" [js_abstract, js_concrete] grammar = new "GFGrammar" [js_abstract, js_concrete]
js_abstract = abstract2js start as js_abstract = abstract2js start as
js_concrete = JS.EObj $ map (concrete2js n) cs js_concrete = JS.EObj $ map concrete2js cs
abstract2js :: String -> Abstr -> JS.Expr abstract2js :: String -> Abstr -> JS.Expr
abstract2js start ds = new "GFAbstract" [JS.EStr start, JS.EObj $ map absdef2js (Map.assocs (funs ds))] abstract2js start ds = new "GFAbstract" [JS.EStr start, JS.EObj $ map absdef2js (Map.assocs (funs ds))]
@@ -39,18 +39,21 @@ absdef2js (f,(typ,_,_)) =
let (args,cat) = M.catSkeleton typ in let (args,cat) = M.catSkeleton typ in
JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (new "Type" [JS.EArray [JS.EStr (showCId x) | x <- args], JS.EStr (showCId cat)]) JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (new "Type" [JS.EArray [JS.EStr (showCId x) | x <- args], JS.EStr (showCId cat)])
concrete2js :: String -> (CId,Concr) -> JS.Property concrete2js :: (CId,Concr) -> JS.Property
concrete2js n (c, cnc) = concrete2js (c,cnc) =
JS.Prop l (new "GFConcrete" ([flags,(JS.EObj $ ((map (cncdef2js n (showCId c)) ds) ++ litslins))] ++ JS.Prop l (new "GFConcrete" [mapToJSObj JS.EStr $ cflags cnc,
maybe [] parser2js (parser cnc))) JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions cnc)],
JS.EArray $ (map ffun2js (Array.elems (functions cnc))),
JS.EArray $ (map seq2js (Array.elems (sequences cnc))),
JS.EObj $ map cats (Map.assocs (startCats cnc)),
JS.EInt (totalCats cnc)])
where where
flags = mapToJSObj JS.EStr $ cflags cnc
l = JS.IdentPropName (JS.Ident (showCId c)) l = JS.IdentPropName (JS.Ident (showCId c))
ds = concatMap Map.assocs [lins cnc, opers cnc, lindefs cnc]
litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]),
JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]),
JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])] JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])]
cats (c,(start,end,_)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
cncdef2js :: String -> String -> (CId,Term) -> JS.Property cncdef2js :: String -> String -> (CId,Term) -> JS.Property
cncdef2js n l (f, t) = JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (JS.EFun [children] [JS.SReturn (term2js n l t)]) cncdef2js n l (f, t) = JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (JS.EFun [children] [JS.SReturn (term2js n l t)])
@@ -88,17 +91,6 @@ argIdent n = JS.Ident ("x" ++ show n)
children :: JS.Ident children :: JS.Ident
children = JS.Ident "cs" children = JS.Ident "cs"
-- Parser
parser2js :: ParserInfo -> [JS.Expr]
parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions p)],
JS.EArray $ (map ffun2js (Array.elems (functions p))),
JS.EArray $ (map seq2js (Array.elems (sequences p))),
JS.EObj $ map cats (Map.assocs (startCats p)),
JS.EInt (totalCats p)]]
where
cats (c,(start,end,_)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
frule2js :: Production -> JS.Expr frule2js :: Production -> JS.Expr
frule2js (FApply funid args) = new "Rule" [JS.EInt funid, JS.EArray (map JS.EInt args)] frule2js (FApply funid args) = new "Rule" [JS.EInt funid, JS.EArray (map JS.EInt args)]
frule2js (FCoerce arg) = new "Coerce" [JS.EInt arg] frule2js (FCoerce arg) = new "Coerce" [JS.EInt arg]

View File

@@ -88,20 +88,11 @@ plFundef (fun, (_,_,eqs)) = [plFact "def" [plp fun, plp fundef']]
-- concrete syntax -- concrete syntax
plConcrete :: (CId, Concr) -> [String] plConcrete :: (CId, Concr) -> [String]
plConcrete (cncname, Concr cflags lins opers lincats lindefs plConcrete (cncname, cnc) =
_printnames _paramlincats _parser) =
["", "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%", ["", "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
"%% concrete module: " ++ plp cncname] ++ "%% concrete module: " ++ plp cncname] ++
clauseHeader "%% cncflag(?Flag, ?Value): flags for concrete syntax" clauseHeader "%% cncflag(?Flag, ?Value): flags for concrete syntax"
(map (mod . plpFact2 "cncflag") (Map.assocs cflags)) ++ (map (mod . plpFact2 "cncflag") (Map.assocs (cflags cnc)))
clauseHeader "%% lincat(?Cat, ?Linearization type)"
(map (mod . plpFact2 "lincat") (Map.assocs lincats)) ++
clauseHeader "%% lindef(?Cat, ?Linearization default)"
(map (mod . plpFact2 "lindef") (Map.assocs lindefs)) ++
clauseHeader "%% lin(?Fun, ?Linearization)"
(map (mod . plpFact2 "lin") (Map.assocs lins)) ++
clauseHeader "%% oper(?Oper, ?Linearization)"
(map (mod . plpFact2 "oper") (Map.assocs opers))
where mod clause = plp cncname ++ ": " ++ clause where mod clause = plp cncname ++ ": " ++ clause

View File

@@ -5,7 +5,7 @@ module GF.Infra.Option
Flags(..), Flags(..),
Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..), Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..),
SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..), SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..),
Dump(..), Printer(..), Recomp(..), BuildParser(..), Dump(..), Printer(..), Recomp(..),
-- * Option parsing -- * Option parsing
parseOptions, parseModuleOptions, fixRelativeLibPaths, parseOptions, parseModuleOptions, fixRelativeLibPaths,
-- * Option pretty-printing -- * Option pretty-printing
@@ -81,7 +81,6 @@ data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252
deriving (Eq,Ord) deriving (Eq,Ord)
data OutputFormat = FmtPGFPretty data OutputFormat = FmtPGFPretty
| FmtPMCFGPretty
| FmtJavaScript | FmtJavaScript
| FmtHaskell | FmtHaskell
| FmtProlog | FmtProlog
@@ -137,9 +136,6 @@ data Printer = PrinterStrip -- ^ Remove name qualifiers.
data Recomp = AlwaysRecomp | RecompIfNewer | NeverRecomp data Recomp = AlwaysRecomp | RecompIfNewer | NeverRecomp
deriving (Show,Eq,Ord) deriving (Show,Eq,Ord)
data BuildParser = BuildParser | DontBuildParser | BuildParserOnDemand
deriving (Show,Eq,Ord)
data Flags = Flags { data Flags = Flags {
optMode :: Mode, optMode :: Mode,
optStopAfterPhase :: Phase, optStopAfterPhase :: Phase,
@@ -172,7 +168,6 @@ data Flags = Flags {
optSpeechLanguage :: Maybe String, optSpeechLanguage :: Maybe String,
optLexer :: Maybe String, optLexer :: Maybe String,
optUnlexer :: Maybe String, optUnlexer :: Maybe String,
optBuildParser :: BuildParser,
optWarnings :: [Warning], optWarnings :: [Warning],
optDump :: [Dump] optDump :: [Dump]
} }
@@ -218,7 +213,6 @@ optionsPGF :: Options -> [(String,String)]
optionsPGF opts = optionsPGF opts =
maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts) maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts)
++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts) ++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts)
++ (if flag optBuildParser opts == BuildParserOnDemand then [("parser","ondemand")] else [])
-- Option manipulation -- Option manipulation
@@ -274,7 +268,6 @@ defaultFlags = Flags {
optSpeechLanguage = Nothing, optSpeechLanguage = Nothing,
optLexer = Nothing, optLexer = Nothing,
optUnlexer = Nothing, optUnlexer = Nothing,
optBuildParser = BuildParser,
optWarnings = [], optWarnings = [],
optDump = [] optDump = []
} }
@@ -351,7 +344,6 @@ optDescr =
Option [] ["coding"] (ReqArg coding "ENCODING") Option [] ["coding"] (ReqArg coding "ENCODING")
("Character encoding of the source grammar, ENCODING = " ("Character encoding of the source grammar, ENCODING = "
++ concat (intersperse " | " (map fst encodings)) ++ "."), ++ concat (intersperse " | " (map fst encodings)) ++ "."),
Option [] ["parser"] (ReqArg buildParser "VALUE") "Build parser (default on). VALUE = on | off | ondemand",
Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.", Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.", Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.", Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.",
@@ -410,11 +402,6 @@ optDescr =
coding x = case lookup x encodings of coding x = case lookup x encodings of
Just c -> set $ \o -> o { optEncoding = c } Just c -> set $ \o -> o { optEncoding = c }
Nothing -> fail $ "Unknown character encoding: " ++ x Nothing -> fail $ "Unknown character encoding: " ++ x
buildParser x = do v <- case x of
"on" -> return BuildParser
"off" -> return DontBuildParser
"ondemand" -> return BuildParserOnDemand
set $ \o -> o { optBuildParser = v }
startcat x = set $ \o -> o { optStartCat = Just x } startcat x = set $ \o -> o { optStartCat = Just x }
language x = set $ \o -> o { optSpeechLanguage = Just x } language x = set $ \o -> o { optSpeechLanguage = Just x }
lexer x = set $ \o -> o { optLexer = Just x } lexer x = set $ \o -> o { optLexer = Just x }
@@ -441,7 +428,6 @@ optDescr =
outputFormats :: [(String,OutputFormat)] outputFormats :: [(String,OutputFormat)]
outputFormats = outputFormats =
[("pgf_pretty", FmtPGFPretty), [("pgf_pretty", FmtPGFPretty),
("pmcfg_pretty", FmtPMCFGPretty),
("js", FmtJavaScript), ("js", FmtJavaScript),
("haskell", FmtHaskell), ("haskell", FmtHaskell),
("prolog", FmtProlog), ("prolog", FmtProlog),

View File

@@ -34,15 +34,15 @@ pgfToCFG :: PGF
-> CFG -> CFG
pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap fruleToCFRule rules) pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap fruleToCFRule rules)
where where
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang) cnc = lookConcr pgf lang
rules :: [(FCat,Production)] rules :: [(FCat,Production)]
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions pinfo) rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions cnc)
, prod <- Set.toList set] , prod <- Set.toList set]
fcatCats :: Map FCat Cat fcatCats :: Map FCat Cat
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i) fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
| (c,(s,e,lbls)) <- Map.toList (startCats pinfo), | (c,(s,e,lbls)) <- Map.toList (startCats cnc),
(fc,i) <- zip (range (s,e)) [1..]] (fc,i) <- zip (range (s,e)) [1..]]
fcatCat :: FCat -> Cat fcatCat :: FCat -> Cat
@@ -58,9 +58,9 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
topdownRules cat = f cat [] topdownRules cat = f cat []
where where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions pinfo)) f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions cnc))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules g (FApply funid args) rules = (functions cnc ! funid,args) : rules
g (FCoerce cat) rules = f cat rules g (FCoerce cat) rules = f cat rules
@@ -69,7 +69,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
startRules :: [CFRule] startRules :: [CFRule]
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,(s,e,lbls)) <- Map.toList (startCats pinfo), | (c,(s,e,lbls)) <- Map.toList (startCats cnc),
fc <- range (s,e), not (isLiteralFCat fc), fc <- range (s,e), not (isLiteralFCat fc),
r <- [0..catLinArity fc-1]] r <- [0..catLinArity fc-1]]
@@ -77,10 +77,10 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
fruleToCFRule (c,FApply funid args) = fruleToCFRule (c,FApply funid args) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]]) [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
| (l,seqid) <- Array.assocs rhs | (l,seqid) <- Array.assocs rhs
, let row = sequences pinfo ! seqid , let row = sequences cnc ! seqid
, not (containsLiterals row)] , not (containsLiterals row)]
where where
FFun f rhs = functions pinfo ! funid FFun f rhs = functions cnc ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol] mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = concatMap fsymbolToSymbol . Array.elems mkRhs = concatMap fsymbolToSymbol . Array.elems

View File

@@ -54,7 +54,7 @@ module PGF(
showPrintName, showPrintName,
-- ** Parsing -- ** Parsing
parse, parseWithRecovery, canParse, parseAllLang, parseAll, parse, parseWithRecovery, parseAllLang, parseAll,
-- ** Evaluation -- ** Evaluation
PGF.compute, paraphrase, PGF.compute, paraphrase,
@@ -106,9 +106,7 @@ import PGF.Morphology
import PGF.Data hiding (functions) import PGF.Data hiding (functions)
import PGF.Binary import PGF.Binary
import qualified PGF.Parse as Parse import qualified PGF.Parse as Parse
import qualified GF.Compile.GeneratePMCFG as PMCFG
import GF.Infra.Option
import GF.Data.Utilities (replace) import GF.Data.Utilities (replace)
import Data.Char import Data.Char
@@ -144,9 +142,6 @@ parse :: PGF -> Language -> Type -> String -> [Tree]
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree] parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
-- | Checks whether the given language can be used for parsing.
canParse :: PGF -> Language -> Bool
-- | The same as 'linearizeAllLang' but does not return -- | The same as 'linearizeAllLang' but does not return
-- the language. -- the language.
linearizeAll :: PGF -> Tree -> [String] linearizeAll :: PGF -> Tree -> [String]
@@ -228,31 +223,17 @@ complete :: PGF -> Language -> Type -> String
-- Implementation -- Implementation
--------------------------------------------------- ---------------------------------------------------
readPGF f = decodeFile f >>= addParsers readPGF f = decodeFile f
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
addParsers :: PGF -> IO PGF
addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
| (lang,cnc) <- Map.toList (concretes pgf)]
return pgf { concretes = Map.fromList cncs }
where
wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
return (lang,cnc { parser = Just pinfo })
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
parse pgf lang typ s = parse pgf lang typ s =
case Map.lookup lang (concretes pgf) of case Map.lookup lang (concretes pgf) of
Just cnc -> case parser cnc of Just cnc -> Parse.parse pgf lang typ (words s)
Just pinfo -> Parse.parse pgf lang typ (words s)
Nothing -> error ("No parser built for language: " ++ showCId lang)
Nothing -> error ("Unknown language: " ++ showCId lang) Nothing -> error ("Unknown language: " ++ showCId lang)
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s) parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
canParse pgf cnc = isJust (lookParser pgf cnc)
linearizeAll mgr = map snd . linearizeAllLang mgr linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t = linearizeAllLang mgr t =
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr] [(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
@@ -260,7 +241,7 @@ linearizeAllLang mgr t =
parseAll mgr typ = map snd . parseAllLang mgr typ parseAll mgr typ = map snd . parseAllLang mgr typ
parseAllLang mgr typ s = parseAllLang mgr typ s =
[(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)] [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)]
generateRandom pgf cat = do generateRandom pgf cat = do
gen <- newStdGen gen <- newStdGen

View File

@@ -51,24 +51,24 @@ instance Binary Abstr where
}) })
instance Binary Concr where instance Binary Concr where
put cnc = put ( cflags cnc, lins cnc, opers cnc put cnc = put ( cflags cnc, printnames cnc
, lincats cnc, lindefs cnc , functions cnc, sequences cnc
, printnames cnc, paramlincats cnc , productions cnc
, parser cnc , totalCats cnc, startCats cnc
) )
get = do cflags <- get get = do cflags <- get
lins <- get printnames <- get
opers <- get functions <- get
lincats <- get sequences <- get
lindefs <- get productions <- get
printnames <- get totalCats <- get
paramlincats <- get startCats <- get
parser <- get return (Concr{ cflags=cflags, printnames=printnames
return (Concr{ cflags=cflags, lins=lins, opers=opers , functions=functions,sequences=sequences
, lincats=lincats, lindefs=lindefs , productions = productions
, printnames=printnames , pproductions = IntMap.empty
, paramlincats=paramlincats , lproductions = Map.empty
, parser=parser , totalCats=totalCats,startCats=startCats
}) })
instance Binary Alternative where instance Binary Alternative where
@@ -186,17 +186,4 @@ instance Binary Production where
1 -> liftM FCoerce get 1 -> liftM FCoerce get
_ -> decodingError _ -> decodingError
instance Binary ParserInfo where
put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
get = do functions <- get
sequences <- get
productions <- get
totalCats <- get
startCats <- get
return (ParserInfo{functions=functions,sequences=sequences
,productions = productions
,pproductions = IntMap.empty
,lproductions = Map.empty
,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF" decodingError = fail "This PGF file was compiled with different version of GF"

View File

@@ -1,4 +1,4 @@
module PGF.Check (checkPGF) where module PGF.Check (checkPGF,checkLin) where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
@@ -7,14 +7,15 @@ import GF.Data.ErrM
import qualified Data.Map as Map import qualified Data.Map as Map
import Control.Monad import Control.Monad
import Data.Maybe(fromMaybe)
import Debug.Trace import Debug.Trace
checkPGF :: PGF -> Err (PGF,Bool) checkPGF :: PGF -> Err (PGF,Bool)
checkPGF pgf = do checkPGF pgf = return (pgf,True) {- do
(cs,bs) <- mapM (checkConcrete pgf) (cs,bs) <- mapM (checkConcrete pgf)
(Map.assocs (concretes pgf)) >>= return . unzip (Map.assocs (concretes pgf)) >>= return . unzip
return (pgf {concretes = Map.fromAscList cs}, and bs) return (pgf {concretes = Map.fromAscList cs}, and bs)
-}
-- errors are non-fatal; replace with 'fail' to change this -- errors are non-fatal; replace with 'fail' to change this
msg s = trace s (return ()) msg s = trace s (return ())
@@ -27,7 +28,7 @@ labelBoolErr ms iob = do
(x,b) <- iob (x,b) <- iob
if b then return (x,b) else (msg ms >> return (x,b)) if b then return (x,b) else (msg ms >> return (x,b))
{-
checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool) checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool)
checkConcrete pgf (lang,cnc) = checkConcrete pgf (lang,cnc) =
labelBoolErr ("happened in language " ++ showCId lang) $ do labelBoolErr ("happened in language " ++ showCId lang) $ do
@@ -35,8 +36,11 @@ checkConcrete pgf (lang,cnc) =
return ((lang,cnc{lins = Map.fromAscList rs}),and bs) return ((lang,cnc{lins = Map.fromAscList rs}),and bs)
where where
checkl = checkLin pgf lang checkl = checkLin pgf lang
-}
checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool) type PGFSig = (Map.Map CId (Type,Int,[Equation]),Map.Map CId Term,Map.Map CId Term)
checkLin :: PGFSig -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
checkLin pgf lang (f,t) = checkLin pgf lang (f,t) =
labelBoolErr ("happened in function " ++ showCId f) $ do labelBoolErr ("happened in function " ++ showCId f) $ do
(t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t (t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t
@@ -124,8 +128,8 @@ ints = C
str :: CType str :: CType
str = S [] str = S []
lintype :: PGF -> CId -> CId -> LinType lintype :: PGFSig -> CId -> CId -> LinType
lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of lintype pgf lang fun = case typeSkeleton (lookFun pgf fun) of
(cs,c) -> (map vlinc cs, linc c) ---- HOAS (cs,c) -> (map vlinc cs, linc c) ---- HOAS
where where
linc = lookLincat pgf lang linc = lookLincat pgf lang
@@ -133,7 +137,7 @@ lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
vlinc (i,c) = case linc c of vlinc (i,c) = case linc c of
R ts -> R (ts ++ replicate i str) R ts -> R (ts ++ replicate i str)
inline :: PGF -> CId -> Term -> Term inline :: PGFSig -> CId -> Term -> Term
inline pgf lang t = case t of inline pgf lang t = case t of
F c -> inl $ look c F c -> inl $ look c
_ -> composSafeOp inl t _ -> composSafeOp inl t
@@ -171,3 +175,7 @@ err :: (String -> b) -> (a -> b) -> Err a -> b
err d f e = case e of err d f e = case e of
Ok a -> f a Ok a -> f a
Bad s -> d s Bad s -> d s
lookFun (abs,lin,lincats) f = (\(a,b,c) -> a) $ fromMaybe (error "No abs") (Map.lookup f abs)
lookLincat (abs,lin,lincats) _ c = fromMaybe (error "No lincat") (Map.lookup c lincats)
lookLin (abs,lin,lincats) _ f = fromMaybe (error "No lin") (Map.lookup f lin)

View File

@@ -1,15 +1,17 @@
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where
import PGF.CId import PGF.CId
import PGF.Expr hiding (Value, Env, Tree) import PGF.Expr hiding (Value, Env, Tree)
import PGF.Type import PGF.Type
import PGF.PMCFG
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Array.Unboxed
import Data.List import Data.List
-- internal datatypes for PGF -- internal datatypes for PGF
-- | An abstract data type representing multilingual grammar -- | An abstract data type representing multilingual grammar
@@ -30,16 +32,40 @@ data Abstr = Abstr {
} }
data Concr = Concr { data Concr = Concr {
cflags :: Map.Map CId String, -- value of a flag cflags :: Map.Map CId String, -- value of a flag
lins :: Map.Map CId Term, -- lin of a fun printnames :: Map.Map CId String, -- printname of a cat or a fun
opers :: Map.Map CId Term, -- oper generated by subex elim functions :: Array FunId FFun,
lincats :: Map.Map CId Term, -- lin type of a cat sequences :: Array SeqId FSeq,
lindefs :: Map.Map CId Term, -- lin default of a cat productions :: IntMap.IntMap (Set.Set Production), -- the original productions loaded from the PGF file
printnames :: Map.Map CId String, -- printname of a cat or a fun pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
parser :: Maybe ParserInfo -- parser startCats :: Map.Map CId (FCat,FCat,Array FIndex String), -- for every category - start/end FCat and a list of label names
totalCats :: {-# UNPACK #-} !FCat
} }
type FCat = Int
type FIndex = Int
type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Expr [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
data Term = data Term =
R [Term] R [Term]
| P Term Term | P Term Term
@@ -59,7 +85,7 @@ data Tokn =
deriving (Eq,Ord,Show) deriving (Eq,Ord,Show)
-- merge two GFCCs; fails is differens absnames; priority to second arg -- merge two PGFs; fails is differens absnames; priority to second arg
unionPGF :: PGF -> PGF -> PGF unionPGF :: PGF -> PGF -> PGF
unionPGF one two = case absname one of unionPGF one two = case absname one of
@@ -93,3 +119,12 @@ readLanguage = readCId
showLanguage :: Language -> String showLanguage :: Language -> String
showLanguage = showCId showLanguage = showCId
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])

View File

@@ -3,7 +3,6 @@ module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Macros import PGF.Macros
import Data.Maybe (fromJust)
import Data.Array.IArray import Data.Array.IArray
import Data.List import Data.List
import Control.Monad import Control.Monad
@@ -22,8 +21,7 @@ linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Ex
linTree pgf lang mark e = lin0 [] [] [] Nothing e linTree pgf lang mark e = lin0 [] [] [] Nothing e
where where
cnc = lookMap (error "no lang") lang (concretes pgf) cnc = lookMap (error "no lang") lang (concretes pgf)
pinfo = fromJust (parser cnc) lp = lproductions cnc
lp = lproductions pinfo
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
@@ -50,7 +48,7 @@ linTree pgf lang mark e = lin0 [] [] [] Nothing e
case prod of case prod of
FApply funid fids -> do guard (length fids == length es) FApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es) args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (FFun _ lins) = functions pinfo ! funid let (FFun _ lins) = functions cnc ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]) return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
FCoerce fid -> apply path xs (Just fid) f es FCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero Nothing -> mzero
@@ -70,7 +68,7 @@ linTree pgf lang mark e = lin0 [] [] [] Nothing e
computeSeq seqid args = concatMap compute (elems seq) computeSeq seqid args = concatMap compute (elems seq)
where where
seq = sequences pinfo ! seqid seq = sequences cnc ! seqid
compute (FSymCat d r) = (args !! d) ! r compute (FSymCat d r) = (args !! d) ! r
compute (FSymLit d r) = (args !! d) ! r compute (FSymLit d r) = (args !! d) ! r
@@ -94,7 +92,7 @@ tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (
where where
lbls = case unApp e of lbls = case unApp e of
Just (f,_) -> let cat = valCat (lookType pgf f) Just (f,_) -> let cat = valCat (lookType pgf f)
in case parser (lookConcr pgf lang) >>= Map.lookup cat . startCats of in case Map.lookup cat (startCats (lookConcr pgf lang)) of
Just (_,_,lbls) -> elems lbls Just (_,_,lbls) -> elems lbls
Nothing -> error "No labels" Nothing -> error "No labels"
Nothing -> error "Not function application" Nothing -> error "Not function application"

View File

@@ -17,22 +17,6 @@ import GF.Data.Utilities(sortNub)
mapConcretes :: (Concr -> Concr) -> PGF -> PGF mapConcretes :: (Concr -> Concr) -> PGF -> PGF
mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) } mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }
lookLin :: PGF -> CId -> CId -> Term
lookLin pgf lang fun =
lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf
lookOper :: PGF -> CId -> CId -> Term
lookOper pgf lang fun =
lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf
lookLincat :: PGF -> CId -> CId -> Term
lookLincat pgf lang fun =
lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf
lookParamLincat :: PGF -> CId -> CId -> Term
lookParamLincat pgf lang fun =
lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf
lookType :: PGF -> CId -> Type lookType :: PGF -> CId -> Type
lookType pgf f = lookType pgf f =
case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of
@@ -52,9 +36,6 @@ isData pgf f =
lookValCat :: PGF -> CId -> CId lookValCat :: PGF -> CId -> CId
lookValCat pgf = valCat . lookType pgf lookValCat pgf = valCat . lookType pgf
lookParser :: PGF -> CId -> Maybe ParserInfo
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
lookStartCat :: PGF -> CId lookStartCat :: PGF -> CId
lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
[gflags pgf, aflags (abstract pgf)] [gflags pgf, aflags (abstract pgf)]
@@ -86,7 +67,7 @@ missingLins pgf lang = [c | c <- fs, not (hasl c)] where
hasl = hasLin pgf lang hasl = hasLin pgf lang
hasLin :: PGF -> CId -> CId -> Bool hasLin :: PGF -> CId -> CId -> Bool
hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang hasLin pgf lang f = Map.member f $ lproductions $ lookConcr pgf lang
restrictPGF :: (CId -> Bool) -> PGF -> PGF restrictPGF :: (CId -> Bool) -> PGF -> PGF
restrictPGF cond pgf = pgf { restrictPGF cond pgf = pgf {
@@ -164,13 +145,11 @@ updateProductionIndices :: PGF -> PGF
updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)} updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)}
where where
updateConcrete cnc = updateConcrete cnc =
case parser cnc of let prods0 = filterProductions (productions cnc)
Nothing -> cnc p_prods = parseIndex cnc prods0
Just pinfo -> let prods0 = filterProductions (productions pinfo) l_prods = linIndex cnc prods0
p_prods = parseIndex pinfo prods0 in cnc{pproductions = p_prods, lproductions = l_prods}
l_prods = linIndex pinfo prods0
in cnc{parser = Just pinfo{pproductions = p_prods, lproductions = l_prods}}
filterProductions prods0 filterProductions prods0
| IntMap.size prods == IntMap.size prods0 = prods | IntMap.size prods == IntMap.size prods0 = prods
| otherwise = filterProductions prods | otherwise = filterProductions prods

View File

@@ -20,7 +20,7 @@ newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho $ buildMorpho pgf lang = Morpho $
case Map.lookup lang (concretes pgf) >>= parser of case Map.lookup lang (concretes pgf) of
Just pinfo -> collectWords pinfo Just pinfo -> collectWords pinfo
Nothing -> Map.empty Nothing -> Map.empty

View File

@@ -1,101 +0,0 @@
module PGF.PMCFG where
import PGF.CId
import PGF.Expr
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint
type FCat = Int
type FIndex = Int
type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Expr [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
data ParserInfo
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions :: IntMap.IntMap (Set.Set Production) -- the original productions loaded from the PGF file
, pproductions :: IntMap.IntMap (Set.Set Production) -- productions needed for parsing
, lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)) -- productions needed for linearization
, startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
, totalCats :: {-# UNPACK #-} !FCat
}
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
ppPMCFG :: ParserInfo -> Doc
ppPMCFG pinfo =
text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
text "functions" $$
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
text "sequences" $$
nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
text "startcats" $$
nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
ppProduction (fcat,FApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFun (funid,FFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
ppStartCat (id,(start,end,labels)) =
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymKS ts) = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "CString"
| fcat == fcatInt = text "CInt"
| fcat == fcatFloat = text "CFloat"
| fcat == fcatVar = text "CVar"
| otherwise = char 'C' <> int fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid

View File

@@ -56,23 +56,20 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ)
-- startup category. -- startup category.
initState :: PGF -> Language -> Type -> ParseState initState :: PGF -> Language -> Type -> ParseState
initState pgf lang (DTyp _ start _) = initState pgf lang (DTyp _ start _) =
let items = case Map.lookup start (startCats pinfo) of let items = case Map.lookup start (startCats cnc) of
Just (s,e,labels) -> do cat <- range (s,e) Just (s,e,labels) -> do cat <- range (s,e)
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (pproductions pinfo) [] cat (pproductions cnc)
let FFun fn lins = functions pinfo ! funid let FFun fn lins = functions cnc ! funid
(lbl,seqid) <- assocs lins (lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl)) return (Active 0 0 funid seqid args (AK cat lbl))
Nothing -> mzero Nothing -> mzero
pinfo = cnc = lookConcr pgf lang
case lookParser pgf lang of
Just pinfo -> pinfo
_ -> error ("Unknown language: " ++ showCId lang)
in PState pgf in PState pgf
pinfo cnc
(Chart emptyAC [] emptyPC (pproductions pinfo) (totalCats pinfo) 0) (Chart emptyAC [] emptyPC (pproductions cnc) (totalCats cnc) 0)
(TMap.singleton [] (Set.fromList items)) (TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token -- | From the current state and the next token
@@ -81,19 +78,19 @@ initState pgf lang (DTyp _ start _) =
-- If the new token cannot be accepted then an error state -- If the new token cannot be accepted then an error state
-- is returned. -- is returned.
nextState :: ParseState -> String -> Either ErrorState ParseState nextState :: ParseState -> String -> Either ErrorState ParseState
nextState (PState pgf pinfo chart items) t = nextState (PState pgf cnc chart items) t =
let (mb_agenda,map_items) = TMap.decompose items let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda agenda = maybe [] Set.toList mb_agenda
acc = fromMaybe TMap.empty (Map.lookup t map_items) acc = fromMaybe TMap.empty (Map.lookup t map_items)
(acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart (acc1,chart1) = process (Just t) add (sequences cnc) (functions cnc) agenda acc chart
chart2 = chart1{ active =emptyAC chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1 , actives=active chart1 : actives chart1
, passive=emptyPC , passive=emptyPC
, offset =offset chart1+1 , offset =offset chart1+1
} }
in if TMap.null acc1 in if TMap.null acc1
then Left (EState pgf pinfo chart2) then Left (EState pgf cnc chart2)
else Right (PState pgf pinfo chart2 acc1) else Right (PState pgf cnc chart2 acc1)
where where
add (tok:toks) item acc add (tok:toks) item acc
| tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
@@ -104,35 +101,35 @@ nextState (PState pgf pinfo chart items) t =
-- next words and the consequent states. This is used for word completions in -- next words and the consequent states. This is used for word completions in
-- the GF interpreter. -- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (PState pgf pinfo chart items) w = getCompletions (PState pgf cnc chart items) w =
let (mb_agenda,map_items) = TMap.decompose items let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda agenda = maybe [] Set.toList mb_agenda
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
(acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart (acc',chart1) = process Nothing add (sequences cnc) (functions cnc) agenda acc chart
chart2 = chart1{ active =emptyAC chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1 , actives=active chart1 : actives chart1
, passive=emptyPC , passive=emptyPC
, offset =offset chart1+1 , offset =offset chart1+1
} }
in fmap (PState pgf pinfo chart2) acc' in fmap (PState pgf cnc chart2) acc'
where where
add (tok:toks) item acc add (tok:toks) item acc
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
add _ item acc = acc add _ item acc = acc
recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState) recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
recoveryStates open_types (EState pgf pinfo chart) = recoveryStates open_types (EState pgf cnc chart) =
let open_fcats = concatMap type2fcats open_types let open_fcats = concatMap type2fcats open_types
agenda = foldl (complete open_fcats) [] (actives chart) agenda = foldl (complete open_fcats) [] (actives chart)
(acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart (acc,chart1) = process Nothing add (sequences cnc) (functions cnc) agenda Map.empty chart
chart2 = chart1{ active =emptyAC chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1 , actives=active chart1 : actives chart1
, passive=emptyPC , passive=emptyPC
, offset =offset chart1+1 , offset =offset chart1+1
} }
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc) in (PState pgf cnc chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf cnc chart2) acc)
where where
type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats pinfo) of type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats cnc) of
Just (s,e,labels) -> range (s,e) Just (s,e,labels) -> range (s,e)
Nothing -> [] Nothing -> []
@@ -149,15 +146,15 @@ recoveryStates open_types (EState pgf pinfo chart) =
-- limited by the category specified, which is usually -- limited by the category specified, which is usually
-- the same as the startup category. -- the same as the startup category.
extractTrees :: ParseState -> Type -> [Tree] extractTrees :: ParseState -> Type -> [Tree]
extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) = extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]] nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
where where
(mb_agenda,acc) = TMap.decompose items (mb_agenda,acc) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda agenda = maybe [] Set.toList mb_agenda
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart (_,st) = process Nothing (\_ _ -> id) (sequences cnc) (functions cnc) agenda () chart
exps = exps =
case Map.lookup start (startCats pinfo) of case Map.lookup start (startCats cnc) of
Just (s,e,lbls) -> do cat <- range (s,e) Just (s,e,lbls) -> do cat <- range (s,e)
lbl <- indices lbls lbl <- indices lbls
Just fid <- [lookupPC (PK cat lbl 0) (passive st)] Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
@@ -167,10 +164,10 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
Nothing -> mzero Nothing -> mzero
go rec fcat' (d,fcat) go rec fcat' (d,fcat)
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments | fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| Set.member fcat rec = mzero | Set.member fcat rec = mzero
| otherwise = foldForest (\funid args trees -> | otherwise = foldForest (\funid args trees ->
do let FFun fn lins = functions pinfo ! funid do let FFun fn lins = functions cnc ! funid
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args) args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
check_ho_fun fn args check_ho_fun fn args
`mplus` `mplus`
@@ -348,7 +345,7 @@ foldForest f g b fcat forest =
-- | An abstract data type whose values represent -- | An abstract data type whose values represent
-- the current state in an incremental parser. -- the current state in an incremental parser.
data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active)) data ParseState = PState PGF Concr Chart (TMap.TrieMap String (Set.Set Active))
data Chart data Chart
= Chart = Chart
@@ -367,4 +364,4 @@ data Chart
-- | An abstract data type whose values represent -- | An abstract data type whose values represent
-- the state in an incremental parser after an error. -- the state in an incremental parser after an error.
data ErrorState = EState PGF ParserInfo Chart data ErrorState = EState PGF Concr Chart

View File

@@ -0,0 +1,89 @@
module PGF.Printer (ppPGF,ppCat,ppFun) where
import PGF.CId
import PGF.Data
import PGF.Macros
import GF.Data.Operations
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.List
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint
ppPGF :: PGF -> Doc
ppPGF pgf = ppAbs (absname pgf) (abstract pgf) $$ ppAll ppCnc (concretes pgf)
ppAbs :: Language -> Abstr -> Doc
ppAbs name a = text "abstract" <+> ppCId name <+> char '{' $$
nest 2 (ppAll ppCat (cats a) $$
ppAll ppFun (funs a)) $$
char '}'
ppCat :: CId -> [Hypo] -> Doc
ppCat c hyps = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL ppHypo [] hyps))
ppFun :: CId -> (Type,Int,[Equation]) -> Doc
ppFun f (t,_,eqs) = text "fun" <+> ppCId f <+> colon <+> ppType 0 [] t $$
if null eqs
then empty
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
in ppCId f <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]
ppCnc :: Language -> Concr -> Doc
ppCnc name cnc =
text "concrete" <+> ppCId name <+> char '{' $$
nest 2 (text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$
text "functions" $$
nest 2 (vcat (map ppFFun (assocs (functions cnc)))) $$
text "sequences" $$
nest 2 (vcat (map ppSeq (assocs (sequences cnc)))) $$
text "startcats" $$
nest 2 (vcat (map ppStartCat (Map.toList (startCats cnc))))) $$
char '}'
ppProduction (fcat,FApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFFun (funid,FFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
ppStartCat (id,(start,end,labels)) =
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymKS ts) = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "CString"
| fcat == fcatInt = text "CInt"
| fcat == fcatFloat = text "CFloat"
| fcat == fcatVar = text "CVar"
| otherwise = char 'C' <> int fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid
-- Utilities
ppAll :: (a -> b -> Doc) -> Map.Map a b -> Doc
ppAll p m = vcat [ p k v | (k,v) <- Map.toList m]