mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-19 01:39:32 -06:00
PGF is now real synchronous PMCFG
This commit is contained in:
@@ -54,7 +54,7 @@ module PGF(
|
||||
showPrintName,
|
||||
|
||||
-- ** Parsing
|
||||
parse, parseWithRecovery, canParse, parseAllLang, parseAll,
|
||||
parse, parseWithRecovery, parseAllLang, parseAll,
|
||||
|
||||
-- ** Evaluation
|
||||
PGF.compute, paraphrase,
|
||||
@@ -106,9 +106,7 @@ import PGF.Morphology
|
||||
import PGF.Data hiding (functions)
|
||||
import PGF.Binary
|
||||
import qualified PGF.Parse as Parse
|
||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
||||
|
||||
import GF.Infra.Option
|
||||
import GF.Data.Utilities (replace)
|
||||
|
||||
import Data.Char
|
||||
@@ -144,9 +142,6 @@ parse :: PGF -> Language -> Type -> String -> [Tree]
|
||||
|
||||
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
|
||||
|
||||
-- | Checks whether the given language can be used for parsing.
|
||||
canParse :: PGF -> Language -> Bool
|
||||
|
||||
-- | The same as 'linearizeAllLang' but does not return
|
||||
-- the language.
|
||||
linearizeAll :: PGF -> Tree -> [String]
|
||||
@@ -228,31 +223,17 @@ complete :: PGF -> Language -> Type -> String
|
||||
-- Implementation
|
||||
---------------------------------------------------
|
||||
|
||||
readPGF f = decodeFile f >>= addParsers
|
||||
|
||||
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
|
||||
addParsers :: PGF -> IO PGF
|
||||
addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
|
||||
| (lang,cnc) <- Map.toList (concretes pgf)]
|
||||
return pgf { concretes = Map.fromList cncs }
|
||||
where
|
||||
wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
|
||||
addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
|
||||
return (lang,cnc { parser = Just pinfo })
|
||||
readPGF f = decodeFile f
|
||||
|
||||
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
|
||||
|
||||
parse pgf lang typ s =
|
||||
case Map.lookup lang (concretes pgf) of
|
||||
Just cnc -> case parser cnc of
|
||||
Just pinfo -> Parse.parse pgf lang typ (words s)
|
||||
Nothing -> error ("No parser built for language: " ++ showCId lang)
|
||||
Just cnc -> Parse.parse pgf lang typ (words s)
|
||||
Nothing -> error ("Unknown language: " ++ showCId lang)
|
||||
|
||||
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
|
||||
|
||||
canParse pgf cnc = isJust (lookParser pgf cnc)
|
||||
|
||||
linearizeAll mgr = map snd . linearizeAllLang mgr
|
||||
linearizeAllLang mgr t =
|
||||
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
|
||||
@@ -260,7 +241,7 @@ linearizeAllLang mgr t =
|
||||
parseAll mgr typ = map snd . parseAllLang mgr typ
|
||||
|
||||
parseAllLang mgr typ s =
|
||||
[(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
|
||||
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)]
|
||||
|
||||
generateRandom pgf cat = do
|
||||
gen <- newStdGen
|
||||
|
||||
@@ -51,24 +51,24 @@ instance Binary Abstr where
|
||||
})
|
||||
|
||||
instance Binary Concr where
|
||||
put cnc = put ( cflags cnc, lins cnc, opers cnc
|
||||
, lincats cnc, lindefs cnc
|
||||
, printnames cnc, paramlincats cnc
|
||||
, parser cnc
|
||||
put cnc = put ( cflags cnc, printnames cnc
|
||||
, functions cnc, sequences cnc
|
||||
, productions cnc
|
||||
, totalCats cnc, startCats cnc
|
||||
)
|
||||
get = do cflags <- get
|
||||
lins <- get
|
||||
opers <- get
|
||||
lincats <- get
|
||||
lindefs <- get
|
||||
printnames <- get
|
||||
paramlincats <- get
|
||||
parser <- get
|
||||
return (Concr{ cflags=cflags, lins=lins, opers=opers
|
||||
, lincats=lincats, lindefs=lindefs
|
||||
, printnames=printnames
|
||||
, paramlincats=paramlincats
|
||||
, parser=parser
|
||||
get = do cflags <- get
|
||||
printnames <- get
|
||||
functions <- get
|
||||
sequences <- get
|
||||
productions <- get
|
||||
totalCats <- get
|
||||
startCats <- get
|
||||
return (Concr{ cflags=cflags, printnames=printnames
|
||||
, functions=functions,sequences=sequences
|
||||
, productions = productions
|
||||
, pproductions = IntMap.empty
|
||||
, lproductions = Map.empty
|
||||
, totalCats=totalCats,startCats=startCats
|
||||
})
|
||||
|
||||
instance Binary Alternative where
|
||||
@@ -186,17 +186,4 @@ instance Binary Production where
|
||||
1 -> liftM FCoerce get
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary ParserInfo where
|
||||
put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
|
||||
get = do functions <- get
|
||||
sequences <- get
|
||||
productions <- get
|
||||
totalCats <- get
|
||||
startCats <- get
|
||||
return (ParserInfo{functions=functions,sequences=sequences
|
||||
,productions = productions
|
||||
,pproductions = IntMap.empty
|
||||
,lproductions = Map.empty
|
||||
,totalCats=totalCats,startCats=startCats})
|
||||
|
||||
decodingError = fail "This PGF file was compiled with different version of GF"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
module PGF.Check (checkPGF) where
|
||||
module PGF.Check (checkPGF,checkLin) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
@@ -7,14 +7,15 @@ import GF.Data.ErrM
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import Control.Monad
|
||||
import Data.Maybe(fromMaybe)
|
||||
import Debug.Trace
|
||||
|
||||
checkPGF :: PGF -> Err (PGF,Bool)
|
||||
checkPGF pgf = do
|
||||
checkPGF pgf = return (pgf,True) {- do
|
||||
(cs,bs) <- mapM (checkConcrete pgf)
|
||||
(Map.assocs (concretes pgf)) >>= return . unzip
|
||||
return (pgf {concretes = Map.fromAscList cs}, and bs)
|
||||
|
||||
-}
|
||||
|
||||
-- errors are non-fatal; replace with 'fail' to change this
|
||||
msg s = trace s (return ())
|
||||
@@ -27,7 +28,7 @@ labelBoolErr ms iob = do
|
||||
(x,b) <- iob
|
||||
if b then return (x,b) else (msg ms >> return (x,b))
|
||||
|
||||
|
||||
{-
|
||||
checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool)
|
||||
checkConcrete pgf (lang,cnc) =
|
||||
labelBoolErr ("happened in language " ++ showCId lang) $ do
|
||||
@@ -35,8 +36,11 @@ checkConcrete pgf (lang,cnc) =
|
||||
return ((lang,cnc{lins = Map.fromAscList rs}),and bs)
|
||||
where
|
||||
checkl = checkLin pgf lang
|
||||
-}
|
||||
|
||||
checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
|
||||
type PGFSig = (Map.Map CId (Type,Int,[Equation]),Map.Map CId Term,Map.Map CId Term)
|
||||
|
||||
checkLin :: PGFSig -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
|
||||
checkLin pgf lang (f,t) =
|
||||
labelBoolErr ("happened in function " ++ showCId f) $ do
|
||||
(t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t
|
||||
@@ -124,8 +128,8 @@ ints = C
|
||||
str :: CType
|
||||
str = S []
|
||||
|
||||
lintype :: PGF -> CId -> CId -> LinType
|
||||
lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
|
||||
lintype :: PGFSig -> CId -> CId -> LinType
|
||||
lintype pgf lang fun = case typeSkeleton (lookFun pgf fun) of
|
||||
(cs,c) -> (map vlinc cs, linc c) ---- HOAS
|
||||
where
|
||||
linc = lookLincat pgf lang
|
||||
@@ -133,7 +137,7 @@ lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
|
||||
vlinc (i,c) = case linc c of
|
||||
R ts -> R (ts ++ replicate i str)
|
||||
|
||||
inline :: PGF -> CId -> Term -> Term
|
||||
inline :: PGFSig -> CId -> Term -> Term
|
||||
inline pgf lang t = case t of
|
||||
F c -> inl $ look c
|
||||
_ -> composSafeOp inl t
|
||||
@@ -171,3 +175,7 @@ err :: (String -> b) -> (a -> b) -> Err a -> b
|
||||
err d f e = case e of
|
||||
Ok a -> f a
|
||||
Bad s -> d s
|
||||
|
||||
lookFun (abs,lin,lincats) f = (\(a,b,c) -> a) $ fromMaybe (error "No abs") (Map.lookup f abs)
|
||||
lookLincat (abs,lin,lincats) _ c = fromMaybe (error "No lincat") (Map.lookup c lincats)
|
||||
lookLin (abs,lin,lincats) _ f = fromMaybe (error "No lin") (Map.lookup f lin)
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where
|
||||
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Expr hiding (Value, Env, Tree)
|
||||
import PGF.Type
|
||||
import PGF.PMCFG
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.Array.IArray
|
||||
import Data.Array.Unboxed
|
||||
import Data.List
|
||||
|
||||
|
||||
-- internal datatypes for PGF
|
||||
|
||||
-- | An abstract data type representing multilingual grammar
|
||||
@@ -30,16 +32,40 @@ data Abstr = Abstr {
|
||||
}
|
||||
|
||||
data Concr = Concr {
|
||||
cflags :: Map.Map CId String, -- value of a flag
|
||||
lins :: Map.Map CId Term, -- lin of a fun
|
||||
opers :: Map.Map CId Term, -- oper generated by subex elim
|
||||
lincats :: Map.Map CId Term, -- lin type of a cat
|
||||
lindefs :: Map.Map CId Term, -- lin default of a cat
|
||||
printnames :: Map.Map CId String, -- printname of a cat or a fun
|
||||
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
|
||||
parser :: Maybe ParserInfo -- parser
|
||||
cflags :: Map.Map CId String, -- value of a flag
|
||||
printnames :: Map.Map CId String, -- printname of a cat or a fun
|
||||
functions :: Array FunId FFun,
|
||||
sequences :: Array SeqId FSeq,
|
||||
productions :: IntMap.IntMap (Set.Set Production), -- the original productions loaded from the PGF file
|
||||
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
|
||||
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
|
||||
startCats :: Map.Map CId (FCat,FCat,Array FIndex String), -- for every category - start/end FCat and a list of label names
|
||||
totalCats :: {-# UNPACK #-} !FCat
|
||||
}
|
||||
|
||||
type FCat = Int
|
||||
type FIndex = Int
|
||||
type FPointPos = Int
|
||||
data FSymbol
|
||||
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymKS [String]
|
||||
| FSymKP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
data Production
|
||||
= FApply {-# UNPACK #-} !FunId [FCat]
|
||||
| FCoerce {-# UNPACK #-} !FCat
|
||||
| FConst Expr [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
||||
type FSeq = Array FPointPos FSymbol
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
|
||||
data Alternative =
|
||||
Alt [String] [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
data Term =
|
||||
R [Term]
|
||||
| P Term Term
|
||||
@@ -59,7 +85,7 @@ data Tokn =
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
-- merge two GFCCs; fails is differens absnames; priority to second arg
|
||||
-- merge two PGFs; fails is differens absnames; priority to second arg
|
||||
|
||||
unionPGF :: PGF -> PGF -> PGF
|
||||
unionPGF one two = case absname one of
|
||||
@@ -93,3 +119,12 @@ readLanguage = readCId
|
||||
|
||||
showLanguage :: Language -> String
|
||||
showLanguage = showCId
|
||||
|
||||
fcatString, fcatInt, fcatFloat, fcatVar :: Int
|
||||
fcatString = (-1)
|
||||
fcatInt = (-2)
|
||||
fcatFloat = (-3)
|
||||
fcatVar = (-4)
|
||||
|
||||
isLiteralFCat :: FCat -> Bool
|
||||
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
|
||||
|
||||
@@ -3,7 +3,6 @@ module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import Data.Maybe (fromJust)
|
||||
import Data.Array.IArray
|
||||
import Data.List
|
||||
import Control.Monad
|
||||
@@ -22,8 +21,7 @@ linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Ex
|
||||
linTree pgf lang mark e = lin0 [] [] [] Nothing e
|
||||
where
|
||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||
pinfo = fromJust (parser cnc)
|
||||
lp = lproductions pinfo
|
||||
lp = lproductions cnc
|
||||
|
||||
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
|
||||
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
|
||||
@@ -50,7 +48,7 @@ linTree pgf lang mark e = lin0 [] [] [] Nothing e
|
||||
case prod of
|
||||
FApply funid fids -> do guard (length fids == length es)
|
||||
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
|
||||
let (FFun _ lins) = functions pinfo ! funid
|
||||
let (FFun _ lins) = functions cnc ! funid
|
||||
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
|
||||
FCoerce fid -> apply path xs (Just fid) f es
|
||||
Nothing -> mzero
|
||||
@@ -70,7 +68,7 @@ linTree pgf lang mark e = lin0 [] [] [] Nothing e
|
||||
|
||||
computeSeq seqid args = concatMap compute (elems seq)
|
||||
where
|
||||
seq = sequences pinfo ! seqid
|
||||
seq = sequences cnc ! seqid
|
||||
|
||||
compute (FSymCat d r) = (args !! d) ! r
|
||||
compute (FSymLit d r) = (args !! d) ! r
|
||||
@@ -94,7 +92,7 @@ tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (
|
||||
where
|
||||
lbls = case unApp e of
|
||||
Just (f,_) -> let cat = valCat (lookType pgf f)
|
||||
in case parser (lookConcr pgf lang) >>= Map.lookup cat . startCats of
|
||||
in case Map.lookup cat (startCats (lookConcr pgf lang)) of
|
||||
Just (_,_,lbls) -> elems lbls
|
||||
Nothing -> error "No labels"
|
||||
Nothing -> error "Not function application"
|
||||
|
||||
@@ -17,22 +17,6 @@ import GF.Data.Utilities(sortNub)
|
||||
mapConcretes :: (Concr -> Concr) -> PGF -> PGF
|
||||
mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }
|
||||
|
||||
lookLin :: PGF -> CId -> CId -> Term
|
||||
lookLin pgf lang fun =
|
||||
lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookOper :: PGF -> CId -> CId -> Term
|
||||
lookOper pgf lang fun =
|
||||
lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookLincat :: PGF -> CId -> CId -> Term
|
||||
lookLincat pgf lang fun =
|
||||
lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookParamLincat :: PGF -> CId -> CId -> Term
|
||||
lookParamLincat pgf lang fun =
|
||||
lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookType :: PGF -> CId -> Type
|
||||
lookType pgf f =
|
||||
case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of
|
||||
@@ -52,9 +36,6 @@ isData pgf f =
|
||||
lookValCat :: PGF -> CId -> CId
|
||||
lookValCat pgf = valCat . lookType pgf
|
||||
|
||||
lookParser :: PGF -> CId -> Maybe ParserInfo
|
||||
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
|
||||
|
||||
lookStartCat :: PGF -> CId
|
||||
lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
|
||||
[gflags pgf, aflags (abstract pgf)]
|
||||
@@ -86,7 +67,7 @@ missingLins pgf lang = [c | c <- fs, not (hasl c)] where
|
||||
hasl = hasLin pgf lang
|
||||
|
||||
hasLin :: PGF -> CId -> CId -> Bool
|
||||
hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang
|
||||
hasLin pgf lang f = Map.member f $ lproductions $ lookConcr pgf lang
|
||||
|
||||
restrictPGF :: (CId -> Bool) -> PGF -> PGF
|
||||
restrictPGF cond pgf = pgf {
|
||||
@@ -164,13 +145,11 @@ updateProductionIndices :: PGF -> PGF
|
||||
updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)}
|
||||
where
|
||||
updateConcrete cnc =
|
||||
case parser cnc of
|
||||
Nothing -> cnc
|
||||
Just pinfo -> let prods0 = filterProductions (productions pinfo)
|
||||
p_prods = parseIndex pinfo prods0
|
||||
l_prods = linIndex pinfo prods0
|
||||
in cnc{parser = Just pinfo{pproductions = p_prods, lproductions = l_prods}}
|
||||
|
||||
let prods0 = filterProductions (productions cnc)
|
||||
p_prods = parseIndex cnc prods0
|
||||
l_prods = linIndex cnc prods0
|
||||
in cnc{pproductions = p_prods, lproductions = l_prods}
|
||||
|
||||
filterProductions prods0
|
||||
| IntMap.size prods == IntMap.size prods0 = prods
|
||||
| otherwise = filterProductions prods
|
||||
|
||||
@@ -20,7 +20,7 @@ newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
|
||||
|
||||
buildMorpho :: PGF -> Language -> Morpho
|
||||
buildMorpho pgf lang = Morpho $
|
||||
case Map.lookup lang (concretes pgf) >>= parser of
|
||||
case Map.lookup lang (concretes pgf) of
|
||||
Just pinfo -> collectWords pinfo
|
||||
Nothing -> Map.empty
|
||||
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
module PGF.PMCFG where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Expr
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.Array.IArray
|
||||
import Data.Array.Unboxed
|
||||
import Text.PrettyPrint
|
||||
|
||||
type FCat = Int
|
||||
type FIndex = Int
|
||||
type FPointPos = Int
|
||||
data FSymbol
|
||||
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymKS [String]
|
||||
| FSymKP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
data Production
|
||||
= FApply {-# UNPACK #-} !FunId [FCat]
|
||||
| FCoerce {-# UNPACK #-} !FCat
|
||||
| FConst Expr [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
||||
type FSeq = Array FPointPos FSymbol
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
|
||||
data Alternative =
|
||||
Alt [String] [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
data ParserInfo
|
||||
= ParserInfo { functions :: Array FunId FFun
|
||||
, sequences :: Array SeqId FSeq
|
||||
, productions :: IntMap.IntMap (Set.Set Production) -- the original productions loaded from the PGF file
|
||||
, pproductions :: IntMap.IntMap (Set.Set Production) -- productions needed for parsing
|
||||
, lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)) -- productions needed for linearization
|
||||
, startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
|
||||
, totalCats :: {-# UNPACK #-} !FCat
|
||||
}
|
||||
|
||||
|
||||
fcatString, fcatInt, fcatFloat, fcatVar :: Int
|
||||
fcatString = (-1)
|
||||
fcatInt = (-2)
|
||||
fcatFloat = (-3)
|
||||
fcatVar = (-4)
|
||||
|
||||
isLiteralFCat :: FCat -> Bool
|
||||
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
|
||||
|
||||
ppPMCFG :: ParserInfo -> Doc
|
||||
ppPMCFG pinfo =
|
||||
text "productions" $$
|
||||
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
|
||||
text "functions" $$
|
||||
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
|
||||
text "sequences" $$
|
||||
nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
|
||||
text "startcats" $$
|
||||
nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
|
||||
|
||||
ppProduction (fcat,FApply funid args) =
|
||||
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
|
||||
ppProduction (fcat,FCoerce arg) =
|
||||
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
|
||||
ppProduction (fcat,FConst _ ss) =
|
||||
ppFCat fcat <+> text "->" <+> ppStrs ss
|
||||
|
||||
ppFun (funid,FFun fun arr) =
|
||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
|
||||
|
||||
ppSeq (seqid,seq) =
|
||||
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
||||
|
||||
ppStartCat (id,(start,end,labels)) =
|
||||
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
|
||||
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
|
||||
|
||||
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymKS ts) = ppStrs ts
|
||||
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
|
||||
|
||||
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
|
||||
ppStrs ss = doubleQuotes (hsep (map text ss))
|
||||
|
||||
ppFCat fcat
|
||||
| fcat == fcatString = text "CString"
|
||||
| fcat == fcatInt = text "CInt"
|
||||
| fcat == fcatFloat = text "CFloat"
|
||||
| fcat == fcatVar = text "CVar"
|
||||
| otherwise = char 'C' <> int fcat
|
||||
|
||||
ppFunId funid = char 'F' <> int funid
|
||||
ppSeqId seqid = char 'S' <> int seqid
|
||||
@@ -56,23 +56,20 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ)
|
||||
-- startup category.
|
||||
initState :: PGF -> Language -> Type -> ParseState
|
||||
initState pgf lang (DTyp _ start _) =
|
||||
let items = case Map.lookup start (startCats pinfo) of
|
||||
let items = case Map.lookup start (startCats cnc) of
|
||||
Just (s,e,labels) -> do cat <- range (s,e)
|
||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
||||
[] cat (pproductions pinfo)
|
||||
let FFun fn lins = functions pinfo ! funid
|
||||
[] cat (pproductions cnc)
|
||||
let FFun fn lins = functions cnc ! funid
|
||||
(lbl,seqid) <- assocs lins
|
||||
return (Active 0 0 funid seqid args (AK cat lbl))
|
||||
Nothing -> mzero
|
||||
|
||||
pinfo =
|
||||
case lookParser pgf lang of
|
||||
Just pinfo -> pinfo
|
||||
_ -> error ("Unknown language: " ++ showCId lang)
|
||||
cnc = lookConcr pgf lang
|
||||
|
||||
in PState pgf
|
||||
pinfo
|
||||
(Chart emptyAC [] emptyPC (pproductions pinfo) (totalCats pinfo) 0)
|
||||
cnc
|
||||
(Chart emptyAC [] emptyPC (pproductions cnc) (totalCats cnc) 0)
|
||||
(TMap.singleton [] (Set.fromList items))
|
||||
|
||||
-- | From the current state and the next token
|
||||
@@ -81,19 +78,19 @@ initState pgf lang (DTyp _ start _) =
|
||||
-- If the new token cannot be accepted then an error state
|
||||
-- is returned.
|
||||
nextState :: ParseState -> String -> Either ErrorState ParseState
|
||||
nextState (PState pgf pinfo chart items) t =
|
||||
nextState (PState pgf cnc chart items) t =
|
||||
let (mb_agenda,map_items) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
acc = fromMaybe TMap.empty (Map.lookup t map_items)
|
||||
(acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
|
||||
(acc1,chart1) = process (Just t) add (sequences cnc) (functions cnc) agenda acc chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in if TMap.null acc1
|
||||
then Left (EState pgf pinfo chart2)
|
||||
else Right (PState pgf pinfo chart2 acc1)
|
||||
then Left (EState pgf cnc chart2)
|
||||
else Right (PState pgf cnc chart2 acc1)
|
||||
where
|
||||
add (tok:toks) item acc
|
||||
| tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
|
||||
@@ -104,35 +101,35 @@ nextState (PState pgf pinfo chart items) t =
|
||||
-- next words and the consequent states. This is used for word completions in
|
||||
-- the GF interpreter.
|
||||
getCompletions :: ParseState -> String -> Map.Map String ParseState
|
||||
getCompletions (PState pgf pinfo chart items) w =
|
||||
getCompletions (PState pgf cnc chart items) w =
|
||||
let (mb_agenda,map_items) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
|
||||
(acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
|
||||
(acc',chart1) = process Nothing add (sequences cnc) (functions cnc) agenda acc chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in fmap (PState pgf pinfo chart2) acc'
|
||||
in fmap (PState pgf cnc chart2) acc'
|
||||
where
|
||||
add (tok:toks) item acc
|
||||
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
|
||||
add _ item acc = acc
|
||||
|
||||
recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
|
||||
recoveryStates open_types (EState pgf pinfo chart) =
|
||||
recoveryStates open_types (EState pgf cnc chart) =
|
||||
let open_fcats = concatMap type2fcats open_types
|
||||
agenda = foldl (complete open_fcats) [] (actives chart)
|
||||
(acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
|
||||
(acc,chart1) = process Nothing add (sequences cnc) (functions cnc) agenda Map.empty chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
|
||||
in (PState pgf cnc chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf cnc chart2) acc)
|
||||
where
|
||||
type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats pinfo) of
|
||||
type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats cnc) of
|
||||
Just (s,e,labels) -> range (s,e)
|
||||
Nothing -> []
|
||||
|
||||
@@ -149,15 +146,15 @@ recoveryStates open_types (EState pgf pinfo chart) =
|
||||
-- limited by the category specified, which is usually
|
||||
-- the same as the startup category.
|
||||
extractTrees :: ParseState -> Type -> [Tree]
|
||||
extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
|
||||
extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
|
||||
nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
|
||||
where
|
||||
(mb_agenda,acc) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
|
||||
(_,st) = process Nothing (\_ _ -> id) (sequences cnc) (functions cnc) agenda () chart
|
||||
|
||||
exps =
|
||||
case Map.lookup start (startCats pinfo) of
|
||||
case Map.lookup start (startCats cnc) of
|
||||
Just (s,e,lbls) -> do cat <- range (s,e)
|
||||
lbl <- indices lbls
|
||||
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
||||
@@ -167,10 +164,10 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
|
||||
Nothing -> mzero
|
||||
|
||||
go rec fcat' (d,fcat)
|
||||
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
||||
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
||||
| Set.member fcat rec = mzero
|
||||
| otherwise = foldForest (\funid args trees ->
|
||||
do let FFun fn lins = functions pinfo ! funid
|
||||
do let FFun fn lins = functions cnc ! funid
|
||||
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
|
||||
check_ho_fun fn args
|
||||
`mplus`
|
||||
@@ -348,7 +345,7 @@ foldForest f g b fcat forest =
|
||||
|
||||
-- | An abstract data type whose values represent
|
||||
-- the current state in an incremental parser.
|
||||
data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
|
||||
data ParseState = PState PGF Concr Chart (TMap.TrieMap String (Set.Set Active))
|
||||
|
||||
data Chart
|
||||
= Chart
|
||||
@@ -367,4 +364,4 @@ data Chart
|
||||
|
||||
-- | An abstract data type whose values represent
|
||||
-- the state in an incremental parser after an error.
|
||||
data ErrorState = EState PGF ParserInfo Chart
|
||||
data ErrorState = EState PGF Concr Chart
|
||||
|
||||
89
src/runtime/haskell/PGF/Printer.hs
Normal file
89
src/runtime/haskell/PGF/Printer.hs
Normal file
@@ -0,0 +1,89 @@
|
||||
module PGF.Printer (ppPGF,ppCat,ppFun) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.List
|
||||
import Data.Array.IArray
|
||||
import Data.Array.Unboxed
|
||||
import Text.PrettyPrint
|
||||
|
||||
|
||||
ppPGF :: PGF -> Doc
|
||||
ppPGF pgf = ppAbs (absname pgf) (abstract pgf) $$ ppAll ppCnc (concretes pgf)
|
||||
|
||||
ppAbs :: Language -> Abstr -> Doc
|
||||
ppAbs name a = text "abstract" <+> ppCId name <+> char '{' $$
|
||||
nest 2 (ppAll ppCat (cats a) $$
|
||||
ppAll ppFun (funs a)) $$
|
||||
char '}'
|
||||
|
||||
ppCat :: CId -> [Hypo] -> Doc
|
||||
ppCat c hyps = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL ppHypo [] hyps))
|
||||
|
||||
ppFun :: CId -> (Type,Int,[Equation]) -> Doc
|
||||
ppFun f (t,_,eqs) = text "fun" <+> ppCId f <+> colon <+> ppType 0 [] t $$
|
||||
if null eqs
|
||||
then empty
|
||||
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
|
||||
in ppCId f <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]
|
||||
|
||||
ppCnc :: Language -> Concr -> Doc
|
||||
ppCnc name cnc =
|
||||
text "concrete" <+> ppCId name <+> char '{' $$
|
||||
nest 2 (text "productions" $$
|
||||
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$
|
||||
text "functions" $$
|
||||
nest 2 (vcat (map ppFFun (assocs (functions cnc)))) $$
|
||||
text "sequences" $$
|
||||
nest 2 (vcat (map ppSeq (assocs (sequences cnc)))) $$
|
||||
text "startcats" $$
|
||||
nest 2 (vcat (map ppStartCat (Map.toList (startCats cnc))))) $$
|
||||
char '}'
|
||||
|
||||
ppProduction (fcat,FApply funid args) =
|
||||
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
|
||||
ppProduction (fcat,FCoerce arg) =
|
||||
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
|
||||
ppProduction (fcat,FConst _ ss) =
|
||||
ppFCat fcat <+> text "->" <+> ppStrs ss
|
||||
|
||||
ppFFun (funid,FFun fun arr) =
|
||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
|
||||
|
||||
ppSeq (seqid,seq) =
|
||||
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
||||
|
||||
ppStartCat (id,(start,end,labels)) =
|
||||
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
|
||||
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
|
||||
|
||||
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymKS ts) = ppStrs ts
|
||||
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
|
||||
|
||||
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
|
||||
ppStrs ss = doubleQuotes (hsep (map text ss))
|
||||
|
||||
ppFCat fcat
|
||||
| fcat == fcatString = text "CString"
|
||||
| fcat == fcatInt = text "CInt"
|
||||
| fcat == fcatFloat = text "CFloat"
|
||||
| fcat == fcatVar = text "CVar"
|
||||
| otherwise = char 'C' <> int fcat
|
||||
|
||||
ppFunId funid = char 'F' <> int funid
|
||||
ppSeqId seqid = char 'S' <> int seqid
|
||||
|
||||
-- Utilities
|
||||
|
||||
ppAll :: (a -> b -> Doc) -> Map.Map a b -> Doc
|
||||
ppAll p m = vcat [ p k v | (k,v) <- Map.toList m]
|
||||
Reference in New Issue
Block a user