1
0
forked from GitHub/gf-core

PGF is now real synchronous PMCFG

This commit is contained in:
krasimir
2010-01-17 21:35:36 +00:00
parent 362f333ebd
commit a039808141
23 changed files with 296 additions and 599 deletions

View File

@@ -43,24 +43,18 @@ library
PGF.Expr
PGF.Type
PGF.Tree
PGF.PMCFG
PGF.Paraphrase
PGF.TypeCheck
PGF.Binary
PGF.Morphology
PGF.VisualizeTree
PGF.Printer
GF.Data.TrieMap
GF.Data.Utilities
GF.Data.SortedList
GF.Data.ErrM
GF.Data.Relation
GF.Data.Operations
-- needed only for the on demand generation of PMCFG
GF.Infra.GetOpt
GF.Infra.Option
GF.Data.ErrM
GF.Data.BacktrackM
GF.Compile.GeneratePMCFG
-- not really part of GF but I have changed the original binary library
-- and we have to keep the copy for now.
Data.Binary
@@ -141,7 +135,6 @@ executable gf
GF.Compile.Abstract.Compute
GF.Compile.Optimize
GF.Compile.SubExOpt
GF.Compile.OptimizePGF
GF.Compile.ModDeps
GF.Compile.GetGrammar
GF.Compile.PGFtoHaskell
@@ -156,7 +149,6 @@ executable gf
PGF.Expr
PGF.Type
PGF.Tree
PGF.PMCFG
PGF.Macros
PGF.Generate
PGF.Linearize
@@ -164,6 +156,7 @@ executable gf
PGF.Paraphrase
PGF.TypeCheck
PGF.Binary
PGF.Printer
GFC
GFI

View File

@@ -213,7 +213,7 @@ langsDemo = langsLang `except` ["Ara","Hin","Ina","Tha"]
langsParse = langs `only` ["Eng"]
-- languages for which langs.pgf is built
langsPGF = langsLang `except` ["Ara","Bul","Hin","Ron","Tha"]
langsPGF = langsLang `except` ["Ara","Hin","Ron","Tha"]
-- languages for which Compatibility exists (to be extended)
langsCompat = langsLang `only` ["Cat","Eng","Fin","Fre","Ita","Spa","Swe"]
@@ -297,15 +297,14 @@ unlexer abstr ls =
-- | Runs the gf executable in compile mode with the given arguments.
run_gfc :: PackageDescription -> LocalBuildInfo -> [String] -> IO ()
run_gfc pkg lbi args =
do let args' = ["-batch","-gf-lib-path="++rgl_src_dir] ++ filter (not . null) args ++ ["+RTS"] ++ rts_flags ++ ["-RTS"]
do let args' = ["-batch","-gf-lib-path="++rgl_src_dir] ++ filter (not . null) args
gf = default_gf pkg lbi
putStrLn $ "Running: " ++ gf ++ " " ++ unwords (map showArg args')
e <- rawSystem gf args'
case e of
ExitSuccess -> return ()
ExitFailure i -> die $ "gf exited with exit code: " ++ show i
where rts_flags = ["-K64M"]
showArg arg = "'" ++ arg ++ "'"
where showArg arg = "'" ++ arg ++ "'"
default_gf pkg lbi = buildDir lbi </> exeName' </> exeNameReal
where

View File

@@ -19,6 +19,7 @@ import PGF.VisualizeTree
import PGF.Macros
import PGF.Data ----
import PGF.Morphology
import PGF.Printer
import GF.Compile.Export
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
import GF.Infra.UseIO
@@ -752,22 +753,17 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts arg -> do
case arg of
[EFun id] -> case Map.lookup id (funs (abstract pgf)) of
Just (ty,_,eqs) -> return $ fromString $
render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$
if null eqs
then empty
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
Nothing -> case Map.lookup id (cats (abstract pgf)) of
Just hyps -> do return $ fromString $
render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)) $$
if null (functionsToCat pgf id)
then empty
else space $$
text "fun" <+> vcat [ppCId fid <+> colon <+> ppType 0 [] ty
| (fid,ty) <- functionsToCat pgf id])
Nothing -> do putStrLn ("unknown category of function identifier "++show id)
return void
Just fd -> return $ fromString $
render (ppFun id fd)
Nothing -> case Map.lookup id (cats (abstract pgf)) of
Just hyps -> do return $ fromString $
render (ppCat id hyps $$
if null (functionsToCat pgf id)
then empty
else space $$
vcat [ppFun fid (ty,0,[]) | (fid,ty) <- functionsToCat pgf id])
Nothing -> do putStrLn ("unknown category of function identifier "++show id)
return void
[e] -> case inferExpr pgf e of
Left tcErr -> error $ render (ppTcError tcErr)
Right (e,ty) -> do putStrLn ("Expression: "++showExpr [] e)
@@ -782,8 +778,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
enc = encodeUnicode cod
par opts s = case optOpenTypes opts of
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang]
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang]
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts]
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
void = ([],[])

View File

@@ -6,7 +6,6 @@ import GF.Compile.Rename
import GF.Compile.CheckGrammar
import GF.Compile.Optimize
import GF.Compile.SubExOpt
import GF.Compile.OptimizePGF
import GF.Compile.GrammarToPGF
import GF.Compile.ReadFiles
import GF.Compile.Update
@@ -54,31 +53,16 @@ compileToPGF opts fs =
link :: Options -> String -> SourceGrammar -> IOE PGF
link opts cnc gr = do
let isv = (verbAtLeast opts Normal)
gc1 <- putPointE Normal opts "linking ... " $
let (abs,gc0) = mkCanon2gfcc opts cnc gr
in case checkPGF gc0 of
Ok (gc,b) -> do
case (isv,b) of
(True, True) -> ioeIO $ putStrLn "OK"
(False,True) -> return ()
_ -> ioeIO $ putStrLn $ "Corrupted PGF"
return gc
Bad s -> fail s
ioeIO $ buildParser opts $ optimize opts gc1
optimize :: Options -> PGF -> PGF
optimize opts = cse . suf
where os = flag optOptimizations opts
cse = if OptCSE `Set.member` os then cseOptimize else id
suf = if OptStem `Set.member` os then suffixOptimize else id
buildParser :: Options -> PGF -> IO PGF
buildParser opts =
case flag optBuildParser opts of
BuildParser -> addParsers opts
DontBuildParser -> return
BuildParserOnDemand -> return . mapConcretes (\cnc -> cnc { cflags = Map.insert (mkCId "parser") "ondemand" (cflags cnc) })
let isv = (verbAtLeast opts Normal)
putPointE Normal opts "linking ... " $ do
gc0 <- ioeIO (mkCanon2pgf opts cnc gr)
case checkPGF gc0 of
Ok (gc,b) -> do case (isv,b) of
(True, True) -> ioeIO $ putStrLn "OK"
(False,True) -> return ()
_ -> ioeIO $ putStrLn $ "Corrupted PGF"
return gc
Bad s -> fail s
batchCompile :: Options -> [FilePath] -> IOE SourceGrammar
batchCompile opts files = do

View File

@@ -2,10 +2,10 @@ module GF.Compile.Export where
import PGF.CId
import PGF.Data (PGF(..))
import PGF.Printer
import GF.Compile.PGFtoHaskell
import GF.Compile.PGFtoProlog
import GF.Compile.PGFtoJS
import GF.Compile.PGFPretty
import GF.Infra.Option
import GF.Speech.CFG
import GF.Speech.PGFToCFG
@@ -20,6 +20,7 @@ import GF.Speech.PrRegExp
import Data.Maybe
import System.FilePath
import Text.PrettyPrint
-- top-level access to code generation
@@ -29,8 +30,7 @@ exportPGF :: Options
-> [(FilePath,String)] -- ^ List of recommended file names and contents.
exportPGF opts fmt pgf =
case fmt of
FmtPGFPretty -> multi "txt" prPGFPretty
FmtPMCFGPretty -> single "pmcfg" prPMCFGPretty
FmtPGFPretty -> multi "txt" (render . ppPGF)
FmtJavaScript -> multi "js" pgf2js
FmtHaskell -> multi "hs" (grammar2haskell opts name)
FmtProlog -> multi "pl" grammar2prolog

View File

@@ -35,24 +35,20 @@ import Control.Exception
-- main conversion function
convertConcrete :: Options -> Abstr -> CId -> Concr -> IO ParserInfo
convertConcrete opts abs lang cnc = do
--convertConcrete :: Options -> Abstr -> CId -> Concr -> IO Concr
convertConcrete opts lang flags printnames abs_defs cnc_defs lincats params lin_defs = do
let env0 = emptyGrammarEnv cnc_defs cat_defs params
when (flag optProf opts) $ do
profileGrammar lang cnc_defs env0 pfrules
let env1 = expandHOAS abs_defs cnc_defs cat_defs lin_defs env0
env2 = List.foldl' (convertRule cnc_defs) env1 pfrules
return $ getParserInfo env2
return $ getParserInfo flags printnames env2
where
abs_defs = Map.assocs (funs abs)
cnc_defs = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
cat_defs = Map.insert cidVar (S []) (lincats cnc)
params = paramlincats cnc
lin_defs = lindefs cnc
cat_defs = Map.insert cidVar (S []) lincats
pfrules = [
(PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) |
(id, (ty,_,_)) <- abs_defs, let (args,res) = typeSkeleton ty,
(id, (ty,_,_)) <- Map.toList abs_defs, let (args,res) = typeSkeleton ty,
term <- maybeToList (Map.lookup id cnc_defs)]
findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs)
@@ -364,7 +360,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats)
where
hoTypes :: [(Int,CId)]
hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs
hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- Map.toList abs_defs
, (n,c) <- fst (typeSkeleton ty), n > 0]
-- add a range of PMCFG categories for each GF high-order category
@@ -438,16 +434,18 @@ addFCoercion env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) sub_fc
Nothing -> let !fcat = last_id+1
in (GrammarEnv fcat catSet seqSet funSet (Map.insert sub_fcats fcat crcSet) prodSet,fcat)
getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions = IntMap.union prodSet coercions
, pproductions = IntMap.empty
, lproductions = Map.empty
, startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
getParserInfo :: Map.Map CId String -> Map.Map CId String -> GrammarEnv -> Concr
getParserInfo flags printnames (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
Concr { cflags = flags
, printnames = printnames
, functions = mkArray funSet
, sequences = mkArray seqSet
, productions = IntMap.union prodSet coercions
, pproductions = IntMap.empty
, lproductions = Map.empty
, startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]

View File

@@ -1,11 +1,12 @@
{-# LANGUAGE PatternGuards #-}
module GF.Compile.GrammarToPGF (mkCanon2gfcc,addParsers) where
module GF.Compile.GrammarToPGF (mkCanon2pgf) where
import GF.Compile.Export
import GF.Compile.GeneratePMCFG
import PGF.CId
import PGF.Macros(updateProductionIndices)
import PGF.Check(checkLin)
import qualified PGF.Macros as CM
import qualified PGF.Data as C
import qualified PGF.Data as D
@@ -36,28 +37,22 @@ traceD s t = t
-- the main function: generate PGF from GF.
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF)
mkCanon2gfcc opts cnc gr =
(showIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon opts abs) gr)
mkCanon2pgf :: Options -> String -> SourceGrammar -> IO D.PGF
mkCanon2pgf opts cnc gr = (canon2pgf opts pars . reorder abs . canon2canon opts abs) gr
where
abs = err (const c) id $ M.abstractOfConcrete gr c where c = identC (BS.pack cnc)
pars = mkParamLincat gr
-- Adds parsers for all concretes
addParsers :: Options -> D.PGF -> IO D.PGF
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
return $ updateProductionIndices $ pgf { D.concretes = Map.fromList cncs }
where
conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc
return (lang,cnc { D.parser = Just pinfo })
-- Generate PGF from GFCM.
-- this assumes a grammar translated by canon2canon
canon2gfcc :: Options -> (Ident -> Ident -> C.Term) -> SourceGrammar -> D.PGF
canon2gfcc opts pars cgr@(M.MGrammar ((a,abm):cms)) =
(if dump opts DumpCanon then trace (render (vcat (map (ppModule Qualified) (M.modules cgr)))) else id) $
D.PGF an cns gflags abs cncs
canon2pgf :: Options -> (Ident -> Ident -> C.Term) -> SourceGrammar -> IO D.PGF
canon2pgf opts pars cgr@(M.MGrammar ((a,abm):cms)) = do
if dump opts DumpCanon
then putStrLn (render (vcat (map (ppModule Qualified) (M.modules cgr))))
else return ()
cncs <- sequence [mkConcr lang (i2i lang) mo | (lang,mo) <- cms]
return (D.PGF an cns gflags abs (Map.fromList cncs))
where
-- abstract
an = (i2i a)
@@ -82,13 +77,15 @@ canon2gfcc opts pars cgr@(M.MGrammar ((a,abm):cms)) =
catfuns = Map.fromList
[(cat,[f | (f, (C.DTyp _ c _,_,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
cncs = Map.fromList [mkConcr lang (i2i lang) mo | (lang,mo) <- cms]
mkConcr lang0 lang mo =
(lang,D.Concr flags lins opers lincats lindefs printnames params fcfg)
mkConcr lang0 lang mo = do
lins' <- case mapM (checkLin (funs,lins,lincats) lang) (Map.toList lins) of
Ok x -> return x
Bad msg -> fail msg
cnc <- convertConcrete opts lang flags printnames funs (Map.fromList (map fst lins')) lincats params lindefs
return (lang, cnc)
where
js = tree2list (M.jments mo)
flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF (M.flags mo)]
opers = Map.fromAscList [] -- opers will be created as optimization
utf = id -- trace (show lang0 +++ show flags) $
-- if moduleFlag optEncoding (moduleOptions (M.flags mo)) == UTF_8
-- then id else id

View File

@@ -1,118 +0,0 @@
module GF.Compile.OptimizePGF where
import PGF.CId
import PGF.Data
import PGF.Macros
import GF.Data.Operations
import Data.List
import qualified Data.Map as Map
-- back-end optimization:
-- suffix analysis followed by common subexpression elimination
optPGF :: PGF -> PGF
optPGF = cseOptimize . suffixOptimize
suffixOptimize :: PGF -> PGF
suffixOptimize = mapConcretes opt
where
opt cnc = cnc {
lins = Map.map optTerm (lins cnc),
lindefs = Map.map optTerm (lindefs cnc)
}
cseOptimize :: PGF -> PGF
cseOptimize = mapConcretes subex
-- analyse word form lists into prefix + suffixes
-- suffix sets can later be shared by subex elim
optTerm :: Term -> Term
optTerm tr = case tr of
R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | K (KS s) <- ts]
R ts -> R $ map optTerm ts
P t v -> P (optTerm t) v
_ -> tr
where
optToks ss = prf : suffs where
prf = pref (head ss) (tail ss)
suffs = map (drop (length prf)) ss
pref cand ss = case ss of
s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss
_ -> cand
isK t = case t of
K (KS _) -> True
_ -> False
mkSuff ("":ws) = R (map (K . KS) ws)
mkSuff (p:ws) = W p (R (map (K . KS) ws))
-- common subexpression elimination
---subex :: [(CId,Term)] -> [(CId,Term)]
subex :: Concr -> Concr
subex cnc = err error id $ do
(tree,_) <- appSTM (getSubtermsMod cnc) (Map.empty,0)
return $ addSubexpConsts tree cnc
type TermList = Map.Map Term (Int,Int) -- number of occs, id
type TermM a = STM (TermList,Int) a
addSubexpConsts :: TermList -> Concr -> Concr
addSubexpConsts tree cnc = cnc {
opers = Map.fromList [(f,recomp f trm) | (f,trm) <- ops],
lins = rec lins,
lindefs = rec lindefs
}
where
ops = [(fid id, trm) | (trm,(_,id)) <- Map.assocs tree]
mkOne (f,trm) = (f, recomp f trm)
recomp f t = case Map.lookup t tree of
Just (_,id) | fid id /= f -> F $ fid id -- not to replace oper itself
_ -> case t of
R ts -> R $ map (recomp f) ts
S ts -> S $ map (recomp f) ts
W s t -> W s (recomp f t)
P t p -> P (recomp f t) (recomp f p)
_ -> t
fid n = mkCId $ "_" ++ show n
rec field = Map.fromAscList [(f,recomp f trm) | (f,trm) <- Map.assocs (field cnc)]
getSubtermsMod :: Concr -> TermM TermList
getSubtermsMod cnc = do
mapM getSubterms (Map.assocs (lins cnc))
mapM getSubterms (Map.assocs (lindefs cnc))
(tree0,_) <- readSTM
return $ Map.filter (\ (nu,_) -> nu > 1) tree0
where
getSubterms (f,trm) = collectSubterms trm >> return ()
collectSubterms :: Term -> TermM ()
collectSubterms t = case t of
R ts -> do
mapM collectSubterms ts
add t
S ts -> do
mapM collectSubterms ts
add t
W s u -> do
collectSubterms u
add t
P p u -> do
collectSubterms p
collectSubterms u
add t
_ -> return ()
where
add t = do
(ts,i) <- readSTM
let
((count,id),next) = case Map.lookup t ts of
Just (nu,id) -> ((nu+1,id), i)
_ -> ((1, i ), i+1)
writeSTM (Map.insert t (count,id) ts, next)

View File

@@ -1,94 +0,0 @@
-- | Print a part of a PGF grammar on the human-readable format used in
-- the paper "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
module GF.Compile.PGFPretty (prPGFPretty, prPMCFGPretty) where
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.PMCFG
import GF.Data.Operations
import Data.Map (Map)
import qualified Data.Map as Map
import Text.PrettyPrint.HughesPJ
prPGFPretty :: PGF -> String
prPGFPretty pgf = render $ prAbs (abstract pgf) $$ prAll (prCnc (abstract pgf)) (concretes pgf)
prPMCFGPretty :: PGF -> CId -> String
prPMCFGPretty pgf lang = render $
case lookParser pgf lang of
Nothing -> empty
Just pinfo -> text "language" <+> ppCId lang $$ ppPMCFG pinfo
prAbs :: Abstr -> Doc
prAbs a = prAll prCat (cats a) $$ prAll prFun (funs a)
prCat :: CId -> [Hypo] -> Doc
prCat c h | isLiteralCat c = empty
| otherwise = text "cat" <+> ppCId c
prFun :: CId -> (Type,Int,[Equation]) -> Doc
prFun f (t,_,_) = text "fun" <+> ppCId f <+> text ":" <+> prType t
prType :: Type -> Doc
prType t = parens (hsep (punctuate (text ",") (map ppCId cs))) <+> text "->" <+> ppCId c
where (cs,c) = catSkeleton t
-- FIXME: show concrete name
-- FIXME: inline opers first
prCnc :: Abstr -> CId -> Concr -> Doc
prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c))
where
prLinCat :: CId -> Term -> Doc
prLinCat c t | isLiteralCat c = empty
| otherwise = text "lincat" <+> ppCId c <+> text "=" <+> pr 0 t
where
pr p (R ts) = prec p 1 (hsep (punctuate (text " *") (map (pr 1) ts)))
pr _ (S []) = text "Str"
pr _ (C n) = text "Int_" <> text (show (n+1))
prLin :: CId -> Term -> Doc
prLin f t = text "lin" <+> ppCId f <+> text "=" <+> pr 0 t
where
pr :: Int -> Term -> Doc
pr p (R ts) = text "<" <+> hsep (punctuate (text ",") (map (pr 0) ts)) <+> text ">"
pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2)
pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts)))
pr p (K (KS t)) = doubleQuotes (text t)
pr p (K _) = empty
pr p (V i) = text ("argv_" ++ show (i+1))
pr p (C i) = text (show (i+1))
pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts)))
pr _ t = error $ "PGFPretty.prLin " ++ show t
linCat :: Concr -> CId -> Term
linCat cnc c = Map.findWithDefault (error $ "lincat: " ++ showCId c) c (lincats cnc)
prec :: Int -> Int -> Doc -> Doc
prec p m | p >= m = parens
| otherwise = id
expand :: Concr -> Concr
expand cnc = cnc { lins = Map.map (f "") (lins cnc) }
where
-- FIXME: handle KP
f :: String -> Term -> Term
f w (R ts) = R (map (f w) ts)
f w (P t1 t2) = P (f w t1) (f w t2)
f w (S []) = S (if null w then [] else [K (KS w)])
f w (S (t:ts)) = S (f w t : map (f "") ts)
f w (FV ts) = FV (map (f w) ts)
f w (W s t) = f (w++s) t
f w (K (KS t)) = K (KS (w++t))
f w (F o) = f w (Map.findWithDefault (error $ "Bad oper: " ++ showCId o) o (opers cnc))
f w t = t
-- Utilities
prAll :: (a -> b -> Doc) -> Map a b -> Doc
prAll p m = vcat [ p k v | (k,v) <- Map.toList m]

View File

@@ -29,7 +29,7 @@ pgf2js pgf =
start = showCId $ M.lookStartCat pgf
grammar = new "GFGrammar" [js_abstract, js_concrete]
js_abstract = abstract2js start as
js_concrete = JS.EObj $ map (concrete2js n) cs
js_concrete = JS.EObj $ map concrete2js cs
abstract2js :: String -> Abstr -> JS.Expr
abstract2js start ds = new "GFAbstract" [JS.EStr start, JS.EObj $ map absdef2js (Map.assocs (funs ds))]
@@ -39,18 +39,21 @@ absdef2js (f,(typ,_,_)) =
let (args,cat) = M.catSkeleton typ in
JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (new "Type" [JS.EArray [JS.EStr (showCId x) | x <- args], JS.EStr (showCId cat)])
concrete2js :: String -> (CId,Concr) -> JS.Property
concrete2js n (c, cnc) =
JS.Prop l (new "GFConcrete" ([flags,(JS.EObj $ ((map (cncdef2js n (showCId c)) ds) ++ litslins))] ++
maybe [] parser2js (parser cnc)))
concrete2js :: (CId,Concr) -> JS.Property
concrete2js (c,cnc) =
JS.Prop l (new "GFConcrete" [mapToJSObj JS.EStr $ cflags cnc,
JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions cnc)],
JS.EArray $ (map ffun2js (Array.elems (functions cnc))),
JS.EArray $ (map seq2js (Array.elems (sequences cnc))),
JS.EObj $ map cats (Map.assocs (startCats cnc)),
JS.EInt (totalCats cnc)])
where
flags = mapToJSObj JS.EStr $ cflags cnc
l = JS.IdentPropName (JS.Ident (showCId c))
ds = concatMap Map.assocs [lins cnc, opers cnc, lindefs cnc]
litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]),
JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]),
JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])]
cats (c,(start,end,_)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
cncdef2js :: String -> String -> (CId,Term) -> JS.Property
cncdef2js n l (f, t) = JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (JS.EFun [children] [JS.SReturn (term2js n l t)])
@@ -88,17 +91,6 @@ argIdent n = JS.Ident ("x" ++ show n)
children :: JS.Ident
children = JS.Ident "cs"
-- Parser
parser2js :: ParserInfo -> [JS.Expr]
parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions p)],
JS.EArray $ (map ffun2js (Array.elems (functions p))),
JS.EArray $ (map seq2js (Array.elems (sequences p))),
JS.EObj $ map cats (Map.assocs (startCats p)),
JS.EInt (totalCats p)]]
where
cats (c,(start,end,_)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
frule2js :: Production -> JS.Expr
frule2js (FApply funid args) = new "Rule" [JS.EInt funid, JS.EArray (map JS.EInt args)]
frule2js (FCoerce arg) = new "Coerce" [JS.EInt arg]

View File

@@ -88,20 +88,11 @@ plFundef (fun, (_,_,eqs)) = [plFact "def" [plp fun, plp fundef']]
-- concrete syntax
plConcrete :: (CId, Concr) -> [String]
plConcrete (cncname, Concr cflags lins opers lincats lindefs
_printnames _paramlincats _parser) =
plConcrete (cncname, cnc) =
["", "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
"%% concrete module: " ++ plp cncname] ++
clauseHeader "%% cncflag(?Flag, ?Value): flags for concrete syntax"
(map (mod . plpFact2 "cncflag") (Map.assocs cflags)) ++
clauseHeader "%% lincat(?Cat, ?Linearization type)"
(map (mod . plpFact2 "lincat") (Map.assocs lincats)) ++
clauseHeader "%% lindef(?Cat, ?Linearization default)"
(map (mod . plpFact2 "lindef") (Map.assocs lindefs)) ++
clauseHeader "%% lin(?Fun, ?Linearization)"
(map (mod . plpFact2 "lin") (Map.assocs lins)) ++
clauseHeader "%% oper(?Oper, ?Linearization)"
(map (mod . plpFact2 "oper") (Map.assocs opers))
(map (mod . plpFact2 "cncflag") (Map.assocs (cflags cnc)))
where mod clause = plp cncname ++ ": " ++ clause

View File

@@ -5,7 +5,7 @@ module GF.Infra.Option
Flags(..),
Mode(..), Phase(..), Verbosity(..), Encoding(..), OutputFormat(..),
SISRFormat(..), Optimization(..), CFGTransform(..), HaskellOption(..),
Dump(..), Printer(..), Recomp(..), BuildParser(..),
Dump(..), Printer(..), Recomp(..),
-- * Option parsing
parseOptions, parseModuleOptions, fixRelativeLibPaths,
-- * Option pretty-printing
@@ -81,7 +81,6 @@ data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252
deriving (Eq,Ord)
data OutputFormat = FmtPGFPretty
| FmtPMCFGPretty
| FmtJavaScript
| FmtHaskell
| FmtProlog
@@ -137,9 +136,6 @@ data Printer = PrinterStrip -- ^ Remove name qualifiers.
data Recomp = AlwaysRecomp | RecompIfNewer | NeverRecomp
deriving (Show,Eq,Ord)
data BuildParser = BuildParser | DontBuildParser | BuildParserOnDemand
deriving (Show,Eq,Ord)
data Flags = Flags {
optMode :: Mode,
optStopAfterPhase :: Phase,
@@ -172,7 +168,6 @@ data Flags = Flags {
optSpeechLanguage :: Maybe String,
optLexer :: Maybe String,
optUnlexer :: Maybe String,
optBuildParser :: BuildParser,
optWarnings :: [Warning],
optDump :: [Dump]
}
@@ -218,7 +213,6 @@ optionsPGF :: Options -> [(String,String)]
optionsPGF opts =
maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts)
++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts)
++ (if flag optBuildParser opts == BuildParserOnDemand then [("parser","ondemand")] else [])
-- Option manipulation
@@ -274,7 +268,6 @@ defaultFlags = Flags {
optSpeechLanguage = Nothing,
optLexer = Nothing,
optUnlexer = Nothing,
optBuildParser = BuildParser,
optWarnings = [],
optDump = []
}
@@ -351,7 +344,6 @@ optDescr =
Option [] ["coding"] (ReqArg coding "ENCODING")
("Character encoding of the source grammar, ENCODING = "
++ concat (intersperse " | " (map fst encodings)) ++ "."),
Option [] ["parser"] (ReqArg buildParser "VALUE") "Build parser (default on). VALUE = on | off | ondemand",
Option [] ["startcat"] (ReqArg startcat "CAT") "Grammar start category.",
Option [] ["language"] (ReqArg language "LANG") "Set the speech language flag to LANG in the generated grammar.",
Option [] ["lexer"] (ReqArg lexer "LEXER") "Use lexer LEXER.",
@@ -410,11 +402,6 @@ optDescr =
coding x = case lookup x encodings of
Just c -> set $ \o -> o { optEncoding = c }
Nothing -> fail $ "Unknown character encoding: " ++ x
buildParser x = do v <- case x of
"on" -> return BuildParser
"off" -> return DontBuildParser
"ondemand" -> return BuildParserOnDemand
set $ \o -> o { optBuildParser = v }
startcat x = set $ \o -> o { optStartCat = Just x }
language x = set $ \o -> o { optSpeechLanguage = Just x }
lexer x = set $ \o -> o { optLexer = Just x }
@@ -441,7 +428,6 @@ optDescr =
outputFormats :: [(String,OutputFormat)]
outputFormats =
[("pgf_pretty", FmtPGFPretty),
("pmcfg_pretty", FmtPMCFGPretty),
("js", FmtJavaScript),
("haskell", FmtHaskell),
("prolog", FmtProlog),

View File

@@ -34,15 +34,15 @@ pgfToCFG :: PGF
-> CFG
pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap fruleToCFRule rules)
where
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
cnc = lookConcr pgf lang
rules :: [(FCat,Production)]
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions pinfo)
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions cnc)
, prod <- Set.toList set]
fcatCats :: Map FCat Cat
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
| (c,(s,e,lbls)) <- Map.toList (startCats pinfo),
| (c,(s,e,lbls)) <- Map.toList (startCats cnc),
(fc,i) <- zip (range (s,e)) [1..]]
fcatCat :: FCat -> Cat
@@ -58,9 +58,9 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
topdownRules cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions pinfo))
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions cnc))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
g (FApply funid args) rules = (functions cnc ! funid,args) : rules
g (FCoerce cat) rules = f cat rules
@@ -69,7 +69,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
startRules :: [CFRule]
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,(s,e,lbls)) <- Map.toList (startCats pinfo),
| (c,(s,e,lbls)) <- Map.toList (startCats cnc),
fc <- range (s,e), not (isLiteralFCat fc),
r <- [0..catLinArity fc-1]]
@@ -77,10 +77,10 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
fruleToCFRule (c,FApply funid args) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
| (l,seqid) <- Array.assocs rhs
, let row = sequences pinfo ! seqid
, let row = sequences cnc ! seqid
, not (containsLiterals row)]
where
FFun f rhs = functions pinfo ! funid
FFun f rhs = functions cnc ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = concatMap fsymbolToSymbol . Array.elems

View File

@@ -54,7 +54,7 @@ module PGF(
showPrintName,
-- ** Parsing
parse, parseWithRecovery, canParse, parseAllLang, parseAll,
parse, parseWithRecovery, parseAllLang, parseAll,
-- ** Evaluation
PGF.compute, paraphrase,
@@ -106,9 +106,7 @@ import PGF.Morphology
import PGF.Data hiding (functions)
import PGF.Binary
import qualified PGF.Parse as Parse
import qualified GF.Compile.GeneratePMCFG as PMCFG
import GF.Infra.Option
import GF.Data.Utilities (replace)
import Data.Char
@@ -144,9 +142,6 @@ parse :: PGF -> Language -> Type -> String -> [Tree]
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
-- | Checks whether the given language can be used for parsing.
canParse :: PGF -> Language -> Bool
-- | The same as 'linearizeAllLang' but does not return
-- the language.
linearizeAll :: PGF -> Tree -> [String]
@@ -228,31 +223,17 @@ complete :: PGF -> Language -> Type -> String
-- Implementation
---------------------------------------------------
readPGF f = decodeFile f >>= addParsers
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
addParsers :: PGF -> IO PGF
addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
| (lang,cnc) <- Map.toList (concretes pgf)]
return pgf { concretes = Map.fromList cncs }
where
wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
return (lang,cnc { parser = Just pinfo })
readPGF f = decodeFile f
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
parse pgf lang typ s =
case Map.lookup lang (concretes pgf) of
Just cnc -> case parser cnc of
Just pinfo -> Parse.parse pgf lang typ (words s)
Nothing -> error ("No parser built for language: " ++ showCId lang)
Just cnc -> Parse.parse pgf lang typ (words s)
Nothing -> error ("Unknown language: " ++ showCId lang)
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
canParse pgf cnc = isJust (lookParser pgf cnc)
linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t =
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
@@ -260,7 +241,7 @@ linearizeAllLang mgr t =
parseAll mgr typ = map snd . parseAllLang mgr typ
parseAllLang mgr typ s =
[(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)]
generateRandom pgf cat = do
gen <- newStdGen

View File

@@ -51,24 +51,24 @@ instance Binary Abstr where
})
instance Binary Concr where
put cnc = put ( cflags cnc, lins cnc, opers cnc
, lincats cnc, lindefs cnc
, printnames cnc, paramlincats cnc
, parser cnc
put cnc = put ( cflags cnc, printnames cnc
, functions cnc, sequences cnc
, productions cnc
, totalCats cnc, startCats cnc
)
get = do cflags <- get
lins <- get
opers <- get
lincats <- get
lindefs <- get
printnames <- get
paramlincats <- get
parser <- get
return (Concr{ cflags=cflags, lins=lins, opers=opers
, lincats=lincats, lindefs=lindefs
, printnames=printnames
, paramlincats=paramlincats
, parser=parser
get = do cflags <- get
printnames <- get
functions <- get
sequences <- get
productions <- get
totalCats <- get
startCats <- get
return (Concr{ cflags=cflags, printnames=printnames
, functions=functions,sequences=sequences
, productions = productions
, pproductions = IntMap.empty
, lproductions = Map.empty
, totalCats=totalCats,startCats=startCats
})
instance Binary Alternative where
@@ -186,17 +186,4 @@ instance Binary Production where
1 -> liftM FCoerce get
_ -> decodingError
instance Binary ParserInfo where
put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
get = do functions <- get
sequences <- get
productions <- get
totalCats <- get
startCats <- get
return (ParserInfo{functions=functions,sequences=sequences
,productions = productions
,pproductions = IntMap.empty
,lproductions = Map.empty
,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF"

View File

@@ -1,4 +1,4 @@
module PGF.Check (checkPGF) where
module PGF.Check (checkPGF,checkLin) where
import PGF.CId
import PGF.Data
@@ -7,14 +7,15 @@ import GF.Data.ErrM
import qualified Data.Map as Map
import Control.Monad
import Data.Maybe(fromMaybe)
import Debug.Trace
checkPGF :: PGF -> Err (PGF,Bool)
checkPGF pgf = do
checkPGF pgf = return (pgf,True) {- do
(cs,bs) <- mapM (checkConcrete pgf)
(Map.assocs (concretes pgf)) >>= return . unzip
return (pgf {concretes = Map.fromAscList cs}, and bs)
-}
-- errors are non-fatal; replace with 'fail' to change this
msg s = trace s (return ())
@@ -27,7 +28,7 @@ labelBoolErr ms iob = do
(x,b) <- iob
if b then return (x,b) else (msg ms >> return (x,b))
{-
checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool)
checkConcrete pgf (lang,cnc) =
labelBoolErr ("happened in language " ++ showCId lang) $ do
@@ -35,8 +36,11 @@ checkConcrete pgf (lang,cnc) =
return ((lang,cnc{lins = Map.fromAscList rs}),and bs)
where
checkl = checkLin pgf lang
-}
checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
type PGFSig = (Map.Map CId (Type,Int,[Equation]),Map.Map CId Term,Map.Map CId Term)
checkLin :: PGFSig -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
checkLin pgf lang (f,t) =
labelBoolErr ("happened in function " ++ showCId f) $ do
(t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t
@@ -124,8 +128,8 @@ ints = C
str :: CType
str = S []
lintype :: PGF -> CId -> CId -> LinType
lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
lintype :: PGFSig -> CId -> CId -> LinType
lintype pgf lang fun = case typeSkeleton (lookFun pgf fun) of
(cs,c) -> (map vlinc cs, linc c) ---- HOAS
where
linc = lookLincat pgf lang
@@ -133,7 +137,7 @@ lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
vlinc (i,c) = case linc c of
R ts -> R (ts ++ replicate i str)
inline :: PGF -> CId -> Term -> Term
inline :: PGFSig -> CId -> Term -> Term
inline pgf lang t = case t of
F c -> inl $ look c
_ -> composSafeOp inl t
@@ -171,3 +175,7 @@ err :: (String -> b) -> (a -> b) -> Err a -> b
err d f e = case e of
Ok a -> f a
Bad s -> d s
lookFun (abs,lin,lincats) f = (\(a,b,c) -> a) $ fromMaybe (error "No abs") (Map.lookup f abs)
lookLincat (abs,lin,lincats) _ c = fromMaybe (error "No lincat") (Map.lookup c lincats)
lookLin (abs,lin,lincats) _ f = fromMaybe (error "No lin") (Map.lookup f lin)

View File

@@ -1,15 +1,17 @@
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where
import PGF.CId
import PGF.Expr hiding (Value, Env, Tree)
import PGF.Type
import PGF.PMCFG
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Array.Unboxed
import Data.List
-- internal datatypes for PGF
-- | An abstract data type representing multilingual grammar
@@ -30,16 +32,40 @@ data Abstr = Abstr {
}
data Concr = Concr {
cflags :: Map.Map CId String, -- value of a flag
lins :: Map.Map CId Term, -- lin of a fun
opers :: Map.Map CId Term, -- oper generated by subex elim
lincats :: Map.Map CId Term, -- lin type of a cat
lindefs :: Map.Map CId Term, -- lin default of a cat
printnames :: Map.Map CId String, -- printname of a cat or a fun
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
parser :: Maybe ParserInfo -- parser
cflags :: Map.Map CId String, -- value of a flag
printnames :: Map.Map CId String, -- printname of a cat or a fun
functions :: Array FunId FFun,
sequences :: Array SeqId FSeq,
productions :: IntMap.IntMap (Set.Set Production), -- the original productions loaded from the PGF file
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
startCats :: Map.Map CId (FCat,FCat,Array FIndex String), -- for every category - start/end FCat and a list of label names
totalCats :: {-# UNPACK #-} !FCat
}
type FCat = Int
type FIndex = Int
type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Expr [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
data Term =
R [Term]
| P Term Term
@@ -59,7 +85,7 @@ data Tokn =
deriving (Eq,Ord,Show)
-- merge two GFCCs; fails is differens absnames; priority to second arg
-- merge two PGFs; fails is differens absnames; priority to second arg
unionPGF :: PGF -> PGF -> PGF
unionPGF one two = case absname one of
@@ -93,3 +119,12 @@ readLanguage = readCId
showLanguage :: Language -> String
showLanguage = showCId
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])

View File

@@ -3,7 +3,6 @@ module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
import PGF.CId
import PGF.Data
import PGF.Macros
import Data.Maybe (fromJust)
import Data.Array.IArray
import Data.List
import Control.Monad
@@ -22,8 +21,7 @@ linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Ex
linTree pgf lang mark e = lin0 [] [] [] Nothing e
where
cnc = lookMap (error "no lang") lang (concretes pgf)
pinfo = fromJust (parser cnc)
lp = lproductions pinfo
lp = lproductions cnc
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
@@ -50,7 +48,7 @@ linTree pgf lang mark e = lin0 [] [] [] Nothing e
case prod of
FApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (FFun _ lins) = functions pinfo ! funid
let (FFun _ lins) = functions cnc ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
FCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero
@@ -70,7 +68,7 @@ linTree pgf lang mark e = lin0 [] [] [] Nothing e
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences pinfo ! seqid
seq = sequences cnc ! seqid
compute (FSymCat d r) = (args !! d) ! r
compute (FSymLit d r) = (args !! d) ! r
@@ -94,7 +92,7 @@ tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (
where
lbls = case unApp e of
Just (f,_) -> let cat = valCat (lookType pgf f)
in case parser (lookConcr pgf lang) >>= Map.lookup cat . startCats of
in case Map.lookup cat (startCats (lookConcr pgf lang)) of
Just (_,_,lbls) -> elems lbls
Nothing -> error "No labels"
Nothing -> error "Not function application"

View File

@@ -17,22 +17,6 @@ import GF.Data.Utilities(sortNub)
mapConcretes :: (Concr -> Concr) -> PGF -> PGF
mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }
lookLin :: PGF -> CId -> CId -> Term
lookLin pgf lang fun =
lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf
lookOper :: PGF -> CId -> CId -> Term
lookOper pgf lang fun =
lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf
lookLincat :: PGF -> CId -> CId -> Term
lookLincat pgf lang fun =
lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf
lookParamLincat :: PGF -> CId -> CId -> Term
lookParamLincat pgf lang fun =
lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf
lookType :: PGF -> CId -> Type
lookType pgf f =
case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of
@@ -52,9 +36,6 @@ isData pgf f =
lookValCat :: PGF -> CId -> CId
lookValCat pgf = valCat . lookType pgf
lookParser :: PGF -> CId -> Maybe ParserInfo
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
lookStartCat :: PGF -> CId
lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
[gflags pgf, aflags (abstract pgf)]
@@ -86,7 +67,7 @@ missingLins pgf lang = [c | c <- fs, not (hasl c)] where
hasl = hasLin pgf lang
hasLin :: PGF -> CId -> CId -> Bool
hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang
hasLin pgf lang f = Map.member f $ lproductions $ lookConcr pgf lang
restrictPGF :: (CId -> Bool) -> PGF -> PGF
restrictPGF cond pgf = pgf {
@@ -164,13 +145,11 @@ updateProductionIndices :: PGF -> PGF
updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)}
where
updateConcrete cnc =
case parser cnc of
Nothing -> cnc
Just pinfo -> let prods0 = filterProductions (productions pinfo)
p_prods = parseIndex pinfo prods0
l_prods = linIndex pinfo prods0
in cnc{parser = Just pinfo{pproductions = p_prods, lproductions = l_prods}}
let prods0 = filterProductions (productions cnc)
p_prods = parseIndex cnc prods0
l_prods = linIndex cnc prods0
in cnc{pproductions = p_prods, lproductions = l_prods}
filterProductions prods0
| IntMap.size prods == IntMap.size prods0 = prods
| otherwise = filterProductions prods

View File

@@ -20,7 +20,7 @@ newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho $
case Map.lookup lang (concretes pgf) >>= parser of
case Map.lookup lang (concretes pgf) of
Just pinfo -> collectWords pinfo
Nothing -> Map.empty

View File

@@ -1,101 +0,0 @@
module PGF.PMCFG where
import PGF.CId
import PGF.Expr
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint
type FCat = Int
type FIndex = Int
type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Expr [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
data ParserInfo
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions :: IntMap.IntMap (Set.Set Production) -- the original productions loaded from the PGF file
, pproductions :: IntMap.IntMap (Set.Set Production) -- productions needed for parsing
, lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)) -- productions needed for linearization
, startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
, totalCats :: {-# UNPACK #-} !FCat
}
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
ppPMCFG :: ParserInfo -> Doc
ppPMCFG pinfo =
text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
text "functions" $$
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
text "sequences" $$
nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
text "startcats" $$
nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
ppProduction (fcat,FApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFun (funid,FFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
ppStartCat (id,(start,end,labels)) =
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymKS ts) = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "CString"
| fcat == fcatInt = text "CInt"
| fcat == fcatFloat = text "CFloat"
| fcat == fcatVar = text "CVar"
| otherwise = char 'C' <> int fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid

View File

@@ -56,23 +56,20 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ)
-- startup category.
initState :: PGF -> Language -> Type -> ParseState
initState pgf lang (DTyp _ start _) =
let items = case Map.lookup start (startCats pinfo) of
let items = case Map.lookup start (startCats cnc) of
Just (s,e,labels) -> do cat <- range (s,e)
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (pproductions pinfo)
let FFun fn lins = functions pinfo ! funid
[] cat (pproductions cnc)
let FFun fn lins = functions cnc ! funid
(lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl))
Nothing -> mzero
pinfo =
case lookParser pgf lang of
Just pinfo -> pinfo
_ -> error ("Unknown language: " ++ showCId lang)
cnc = lookConcr pgf lang
in PState pgf
pinfo
(Chart emptyAC [] emptyPC (pproductions pinfo) (totalCats pinfo) 0)
cnc
(Chart emptyAC [] emptyPC (pproductions cnc) (totalCats cnc) 0)
(TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token
@@ -81,19 +78,19 @@ initState pgf lang (DTyp _ start _) =
-- If the new token cannot be accepted then an error state
-- is returned.
nextState :: ParseState -> String -> Either ErrorState ParseState
nextState (PState pgf pinfo chart items) t =
nextState (PState pgf cnc chart items) t =
let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
acc = fromMaybe TMap.empty (Map.lookup t map_items)
(acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
(acc1,chart1) = process (Just t) add (sequences cnc) (functions cnc) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in if TMap.null acc1
then Left (EState pgf pinfo chart2)
else Right (PState pgf pinfo chart2 acc1)
then Left (EState pgf cnc chart2)
else Right (PState pgf cnc chart2 acc1)
where
add (tok:toks) item acc
| tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
@@ -104,35 +101,35 @@ nextState (PState pgf pinfo chart items) t =
-- next words and the consequent states. This is used for word completions in
-- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (PState pgf pinfo chart items) w =
getCompletions (PState pgf cnc chart items) w =
let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
(acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
(acc',chart1) = process Nothing add (sequences cnc) (functions cnc) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in fmap (PState pgf pinfo chart2) acc'
in fmap (PState pgf cnc chart2) acc'
where
add (tok:toks) item acc
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
add _ item acc = acc
recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
recoveryStates open_types (EState pgf pinfo chart) =
recoveryStates open_types (EState pgf cnc chart) =
let open_fcats = concatMap type2fcats open_types
agenda = foldl (complete open_fcats) [] (actives chart)
(acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
(acc,chart1) = process Nothing add (sequences cnc) (functions cnc) agenda Map.empty chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
in (PState pgf cnc chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf cnc chart2) acc)
where
type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats pinfo) of
type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats cnc) of
Just (s,e,labels) -> range (s,e)
Nothing -> []
@@ -149,15 +146,15 @@ recoveryStates open_types (EState pgf pinfo chart) =
-- limited by the category specified, which is usually
-- the same as the startup category.
extractTrees :: ParseState -> Type -> [Tree]
extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
where
(mb_agenda,acc) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
(_,st) = process Nothing (\_ _ -> id) (sequences cnc) (functions cnc) agenda () chart
exps =
case Map.lookup start (startCats pinfo) of
case Map.lookup start (startCats cnc) of
Just (s,e,lbls) -> do cat <- range (s,e)
lbl <- indices lbls
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
@@ -167,10 +164,10 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
Nothing -> mzero
go rec fcat' (d,fcat)
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| Set.member fcat rec = mzero
| otherwise = foldForest (\funid args trees ->
do let FFun fn lins = functions pinfo ! funid
do let FFun fn lins = functions cnc ! funid
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
check_ho_fun fn args
`mplus`
@@ -348,7 +345,7 @@ foldForest f g b fcat forest =
-- | An abstract data type whose values represent
-- the current state in an incremental parser.
data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
data ParseState = PState PGF Concr Chart (TMap.TrieMap String (Set.Set Active))
data Chart
= Chart
@@ -367,4 +364,4 @@ data Chart
-- | An abstract data type whose values represent
-- the state in an incremental parser after an error.
data ErrorState = EState PGF ParserInfo Chart
data ErrorState = EState PGF Concr Chart

View File

@@ -0,0 +1,89 @@
module PGF.Printer (ppPGF,ppCat,ppFun) where
import PGF.CId
import PGF.Data
import PGF.Macros
import GF.Data.Operations
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.List
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint
ppPGF :: PGF -> Doc
ppPGF pgf = ppAbs (absname pgf) (abstract pgf) $$ ppAll ppCnc (concretes pgf)
ppAbs :: Language -> Abstr -> Doc
ppAbs name a = text "abstract" <+> ppCId name <+> char '{' $$
nest 2 (ppAll ppCat (cats a) $$
ppAll ppFun (funs a)) $$
char '}'
ppCat :: CId -> [Hypo] -> Doc
ppCat c hyps = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL ppHypo [] hyps))
ppFun :: CId -> (Type,Int,[Equation]) -> Doc
ppFun f (t,_,eqs) = text "fun" <+> ppCId f <+> colon <+> ppType 0 [] t $$
if null eqs
then empty
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
in ppCId f <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]
ppCnc :: Language -> Concr -> Doc
ppCnc name cnc =
text "concrete" <+> ppCId name <+> char '{' $$
nest 2 (text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$
text "functions" $$
nest 2 (vcat (map ppFFun (assocs (functions cnc)))) $$
text "sequences" $$
nest 2 (vcat (map ppSeq (assocs (sequences cnc)))) $$
text "startcats" $$
nest 2 (vcat (map ppStartCat (Map.toList (startCats cnc))))) $$
char '}'
ppProduction (fcat,FApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFFun (funid,FFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
ppStartCat (id,(start,end,labels)) =
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymKS ts) = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "CString"
| fcat == fcatInt = text "CInt"
| fcat == fcatFloat = text "CFloat"
| fcat == fcatVar = text "CVar"
| otherwise = char 'C' <> int fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid
-- Utilities
ppAll :: (a -> b -> Doc) -> Map.Map a b -> Doc
ppAll p m = vcat [ p k v | (k,v) <- Map.toList m]