PGF is now real synchronous PMCFG

This commit is contained in:
krasimir
2010-01-17 21:35:36 +00:00
parent af13bae2df
commit 9b362ff231
23 changed files with 296 additions and 599 deletions

View File

@@ -54,7 +54,7 @@ module PGF(
showPrintName,
-- ** Parsing
parse, parseWithRecovery, canParse, parseAllLang, parseAll,
parse, parseWithRecovery, parseAllLang, parseAll,
-- ** Evaluation
PGF.compute, paraphrase,
@@ -106,9 +106,7 @@ import PGF.Morphology
import PGF.Data hiding (functions)
import PGF.Binary
import qualified PGF.Parse as Parse
import qualified GF.Compile.GeneratePMCFG as PMCFG
import GF.Infra.Option
import GF.Data.Utilities (replace)
import Data.Char
@@ -144,9 +142,6 @@ parse :: PGF -> Language -> Type -> String -> [Tree]
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
-- | Checks whether the given language can be used for parsing.
canParse :: PGF -> Language -> Bool
-- | The same as 'linearizeAllLang' but does not return
-- the language.
linearizeAll :: PGF -> Tree -> [String]
@@ -228,31 +223,17 @@ complete :: PGF -> Language -> Type -> String
-- Implementation
---------------------------------------------------
readPGF f = decodeFile f >>= addParsers
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
addParsers :: PGF -> IO PGF
addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
| (lang,cnc) <- Map.toList (concretes pgf)]
return pgf { concretes = Map.fromList cncs }
where
wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
return (lang,cnc { parser = Just pinfo })
readPGF f = decodeFile f
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
parse pgf lang typ s =
case Map.lookup lang (concretes pgf) of
Just cnc -> case parser cnc of
Just pinfo -> Parse.parse pgf lang typ (words s)
Nothing -> error ("No parser built for language: " ++ showCId lang)
Just cnc -> Parse.parse pgf lang typ (words s)
Nothing -> error ("Unknown language: " ++ showCId lang)
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
canParse pgf cnc = isJust (lookParser pgf cnc)
linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t =
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
@@ -260,7 +241,7 @@ linearizeAllLang mgr t =
parseAll mgr typ = map snd . parseAllLang mgr typ
parseAllLang mgr typ s =
[(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)]
generateRandom pgf cat = do
gen <- newStdGen

View File

@@ -51,24 +51,24 @@ instance Binary Abstr where
})
instance Binary Concr where
put cnc = put ( cflags cnc, lins cnc, opers cnc
, lincats cnc, lindefs cnc
, printnames cnc, paramlincats cnc
, parser cnc
put cnc = put ( cflags cnc, printnames cnc
, functions cnc, sequences cnc
, productions cnc
, totalCats cnc, startCats cnc
)
get = do cflags <- get
lins <- get
opers <- get
lincats <- get
lindefs <- get
printnames <- get
paramlincats <- get
parser <- get
return (Concr{ cflags=cflags, lins=lins, opers=opers
, lincats=lincats, lindefs=lindefs
, printnames=printnames
, paramlincats=paramlincats
, parser=parser
get = do cflags <- get
printnames <- get
functions <- get
sequences <- get
productions <- get
totalCats <- get
startCats <- get
return (Concr{ cflags=cflags, printnames=printnames
, functions=functions,sequences=sequences
, productions = productions
, pproductions = IntMap.empty
, lproductions = Map.empty
, totalCats=totalCats,startCats=startCats
})
instance Binary Alternative where
@@ -186,17 +186,4 @@ instance Binary Production where
1 -> liftM FCoerce get
_ -> decodingError
instance Binary ParserInfo where
put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
get = do functions <- get
sequences <- get
productions <- get
totalCats <- get
startCats <- get
return (ParserInfo{functions=functions,sequences=sequences
,productions = productions
,pproductions = IntMap.empty
,lproductions = Map.empty
,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF"

View File

@@ -1,4 +1,4 @@
module PGF.Check (checkPGF) where
module PGF.Check (checkPGF,checkLin) where
import PGF.CId
import PGF.Data
@@ -7,14 +7,15 @@ import GF.Data.ErrM
import qualified Data.Map as Map
import Control.Monad
import Data.Maybe(fromMaybe)
import Debug.Trace
checkPGF :: PGF -> Err (PGF,Bool)
checkPGF pgf = do
checkPGF pgf = return (pgf,True) {- do
(cs,bs) <- mapM (checkConcrete pgf)
(Map.assocs (concretes pgf)) >>= return . unzip
return (pgf {concretes = Map.fromAscList cs}, and bs)
-}
-- errors are non-fatal; replace with 'fail' to change this
msg s = trace s (return ())
@@ -27,7 +28,7 @@ labelBoolErr ms iob = do
(x,b) <- iob
if b then return (x,b) else (msg ms >> return (x,b))
{-
checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool)
checkConcrete pgf (lang,cnc) =
labelBoolErr ("happened in language " ++ showCId lang) $ do
@@ -35,8 +36,11 @@ checkConcrete pgf (lang,cnc) =
return ((lang,cnc{lins = Map.fromAscList rs}),and bs)
where
checkl = checkLin pgf lang
-}
checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
type PGFSig = (Map.Map CId (Type,Int,[Equation]),Map.Map CId Term,Map.Map CId Term)
checkLin :: PGFSig -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
checkLin pgf lang (f,t) =
labelBoolErr ("happened in function " ++ showCId f) $ do
(t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t
@@ -124,8 +128,8 @@ ints = C
str :: CType
str = S []
lintype :: PGF -> CId -> CId -> LinType
lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
lintype :: PGFSig -> CId -> CId -> LinType
lintype pgf lang fun = case typeSkeleton (lookFun pgf fun) of
(cs,c) -> (map vlinc cs, linc c) ---- HOAS
where
linc = lookLincat pgf lang
@@ -133,7 +137,7 @@ lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
vlinc (i,c) = case linc c of
R ts -> R (ts ++ replicate i str)
inline :: PGF -> CId -> Term -> Term
inline :: PGFSig -> CId -> Term -> Term
inline pgf lang t = case t of
F c -> inl $ look c
_ -> composSafeOp inl t
@@ -171,3 +175,7 @@ err :: (String -> b) -> (a -> b) -> Err a -> b
err d f e = case e of
Ok a -> f a
Bad s -> d s
lookFun (abs,lin,lincats) f = (\(a,b,c) -> a) $ fromMaybe (error "No abs") (Map.lookup f abs)
lookLincat (abs,lin,lincats) _ c = fromMaybe (error "No lincat") (Map.lookup c lincats)
lookLin (abs,lin,lincats) _ f = fromMaybe (error "No lin") (Map.lookup f lin)

View File

@@ -1,15 +1,17 @@
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where
import PGF.CId
import PGF.Expr hiding (Value, Env, Tree)
import PGF.Type
import PGF.PMCFG
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Array.Unboxed
import Data.List
-- internal datatypes for PGF
-- | An abstract data type representing multilingual grammar
@@ -30,16 +32,40 @@ data Abstr = Abstr {
}
data Concr = Concr {
cflags :: Map.Map CId String, -- value of a flag
lins :: Map.Map CId Term, -- lin of a fun
opers :: Map.Map CId Term, -- oper generated by subex elim
lincats :: Map.Map CId Term, -- lin type of a cat
lindefs :: Map.Map CId Term, -- lin default of a cat
printnames :: Map.Map CId String, -- printname of a cat or a fun
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
parser :: Maybe ParserInfo -- parser
cflags :: Map.Map CId String, -- value of a flag
printnames :: Map.Map CId String, -- printname of a cat or a fun
functions :: Array FunId FFun,
sequences :: Array SeqId FSeq,
productions :: IntMap.IntMap (Set.Set Production), -- the original productions loaded from the PGF file
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
startCats :: Map.Map CId (FCat,FCat,Array FIndex String), -- for every category - start/end FCat and a list of label names
totalCats :: {-# UNPACK #-} !FCat
}
type FCat = Int
type FIndex = Int
type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Expr [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
data Term =
R [Term]
| P Term Term
@@ -59,7 +85,7 @@ data Tokn =
deriving (Eq,Ord,Show)
-- merge two GFCCs; fails is differens absnames; priority to second arg
-- merge two PGFs; fails is differens absnames; priority to second arg
unionPGF :: PGF -> PGF -> PGF
unionPGF one two = case absname one of
@@ -93,3 +119,12 @@ readLanguage = readCId
showLanguage :: Language -> String
showLanguage = showCId
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])

View File

@@ -3,7 +3,6 @@ module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
import PGF.CId
import PGF.Data
import PGF.Macros
import Data.Maybe (fromJust)
import Data.Array.IArray
import Data.List
import Control.Monad
@@ -22,8 +21,7 @@ linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Ex
linTree pgf lang mark e = lin0 [] [] [] Nothing e
where
cnc = lookMap (error "no lang") lang (concretes pgf)
pinfo = fromJust (parser cnc)
lp = lproductions pinfo
lp = lproductions cnc
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
@@ -50,7 +48,7 @@ linTree pgf lang mark e = lin0 [] [] [] Nothing e
case prod of
FApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (FFun _ lins) = functions pinfo ! funid
let (FFun _ lins) = functions cnc ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
FCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero
@@ -70,7 +68,7 @@ linTree pgf lang mark e = lin0 [] [] [] Nothing e
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences pinfo ! seqid
seq = sequences cnc ! seqid
compute (FSymCat d r) = (args !! d) ! r
compute (FSymLit d r) = (args !! d) ! r
@@ -94,7 +92,7 @@ tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (
where
lbls = case unApp e of
Just (f,_) -> let cat = valCat (lookType pgf f)
in case parser (lookConcr pgf lang) >>= Map.lookup cat . startCats of
in case Map.lookup cat (startCats (lookConcr pgf lang)) of
Just (_,_,lbls) -> elems lbls
Nothing -> error "No labels"
Nothing -> error "Not function application"

View File

@@ -17,22 +17,6 @@ import GF.Data.Utilities(sortNub)
mapConcretes :: (Concr -> Concr) -> PGF -> PGF
mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }
lookLin :: PGF -> CId -> CId -> Term
lookLin pgf lang fun =
lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf
lookOper :: PGF -> CId -> CId -> Term
lookOper pgf lang fun =
lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf
lookLincat :: PGF -> CId -> CId -> Term
lookLincat pgf lang fun =
lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf
lookParamLincat :: PGF -> CId -> CId -> Term
lookParamLincat pgf lang fun =
lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf
lookType :: PGF -> CId -> Type
lookType pgf f =
case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of
@@ -52,9 +36,6 @@ isData pgf f =
lookValCat :: PGF -> CId -> CId
lookValCat pgf = valCat . lookType pgf
lookParser :: PGF -> CId -> Maybe ParserInfo
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
lookStartCat :: PGF -> CId
lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
[gflags pgf, aflags (abstract pgf)]
@@ -86,7 +67,7 @@ missingLins pgf lang = [c | c <- fs, not (hasl c)] where
hasl = hasLin pgf lang
hasLin :: PGF -> CId -> CId -> Bool
hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang
hasLin pgf lang f = Map.member f $ lproductions $ lookConcr pgf lang
restrictPGF :: (CId -> Bool) -> PGF -> PGF
restrictPGF cond pgf = pgf {
@@ -164,13 +145,11 @@ updateProductionIndices :: PGF -> PGF
updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)}
where
updateConcrete cnc =
case parser cnc of
Nothing -> cnc
Just pinfo -> let prods0 = filterProductions (productions pinfo)
p_prods = parseIndex pinfo prods0
l_prods = linIndex pinfo prods0
in cnc{parser = Just pinfo{pproductions = p_prods, lproductions = l_prods}}
let prods0 = filterProductions (productions cnc)
p_prods = parseIndex cnc prods0
l_prods = linIndex cnc prods0
in cnc{pproductions = p_prods, lproductions = l_prods}
filterProductions prods0
| IntMap.size prods == IntMap.size prods0 = prods
| otherwise = filterProductions prods

View File

@@ -20,7 +20,7 @@ newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho $
case Map.lookup lang (concretes pgf) >>= parser of
case Map.lookup lang (concretes pgf) of
Just pinfo -> collectWords pinfo
Nothing -> Map.empty

View File

@@ -1,101 +0,0 @@
module PGF.PMCFG where
import PGF.CId
import PGF.Expr
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint
type FCat = Int
type FIndex = Int
type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Expr [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
data ParserInfo
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions :: IntMap.IntMap (Set.Set Production) -- the original productions loaded from the PGF file
, pproductions :: IntMap.IntMap (Set.Set Production) -- productions needed for parsing
, lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)) -- productions needed for linearization
, startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
, totalCats :: {-# UNPACK #-} !FCat
}
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
ppPMCFG :: ParserInfo -> Doc
ppPMCFG pinfo =
text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
text "functions" $$
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
text "sequences" $$
nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
text "startcats" $$
nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
ppProduction (fcat,FApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFun (funid,FFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
ppStartCat (id,(start,end,labels)) =
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymKS ts) = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "CString"
| fcat == fcatInt = text "CInt"
| fcat == fcatFloat = text "CFloat"
| fcat == fcatVar = text "CVar"
| otherwise = char 'C' <> int fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid

View File

@@ -56,23 +56,20 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ)
-- startup category.
initState :: PGF -> Language -> Type -> ParseState
initState pgf lang (DTyp _ start _) =
let items = case Map.lookup start (startCats pinfo) of
let items = case Map.lookup start (startCats cnc) of
Just (s,e,labels) -> do cat <- range (s,e)
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (pproductions pinfo)
let FFun fn lins = functions pinfo ! funid
[] cat (pproductions cnc)
let FFun fn lins = functions cnc ! funid
(lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl))
Nothing -> mzero
pinfo =
case lookParser pgf lang of
Just pinfo -> pinfo
_ -> error ("Unknown language: " ++ showCId lang)
cnc = lookConcr pgf lang
in PState pgf
pinfo
(Chart emptyAC [] emptyPC (pproductions pinfo) (totalCats pinfo) 0)
cnc
(Chart emptyAC [] emptyPC (pproductions cnc) (totalCats cnc) 0)
(TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token
@@ -81,19 +78,19 @@ initState pgf lang (DTyp _ start _) =
-- If the new token cannot be accepted then an error state
-- is returned.
nextState :: ParseState -> String -> Either ErrorState ParseState
nextState (PState pgf pinfo chart items) t =
nextState (PState pgf cnc chart items) t =
let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
acc = fromMaybe TMap.empty (Map.lookup t map_items)
(acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
(acc1,chart1) = process (Just t) add (sequences cnc) (functions cnc) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in if TMap.null acc1
then Left (EState pgf pinfo chart2)
else Right (PState pgf pinfo chart2 acc1)
then Left (EState pgf cnc chart2)
else Right (PState pgf cnc chart2 acc1)
where
add (tok:toks) item acc
| tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
@@ -104,35 +101,35 @@ nextState (PState pgf pinfo chart items) t =
-- next words and the consequent states. This is used for word completions in
-- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (PState pgf pinfo chart items) w =
getCompletions (PState pgf cnc chart items) w =
let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
(acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
(acc',chart1) = process Nothing add (sequences cnc) (functions cnc) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in fmap (PState pgf pinfo chart2) acc'
in fmap (PState pgf cnc chart2) acc'
where
add (tok:toks) item acc
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
add _ item acc = acc
recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
recoveryStates open_types (EState pgf pinfo chart) =
recoveryStates open_types (EState pgf cnc chart) =
let open_fcats = concatMap type2fcats open_types
agenda = foldl (complete open_fcats) [] (actives chart)
(acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
(acc,chart1) = process Nothing add (sequences cnc) (functions cnc) agenda Map.empty chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
in (PState pgf cnc chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf cnc chart2) acc)
where
type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats pinfo) of
type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats cnc) of
Just (s,e,labels) -> range (s,e)
Nothing -> []
@@ -149,15 +146,15 @@ recoveryStates open_types (EState pgf pinfo chart) =
-- limited by the category specified, which is usually
-- the same as the startup category.
extractTrees :: ParseState -> Type -> [Tree]
extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
where
(mb_agenda,acc) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
(_,st) = process Nothing (\_ _ -> id) (sequences cnc) (functions cnc) agenda () chart
exps =
case Map.lookup start (startCats pinfo) of
case Map.lookup start (startCats cnc) of
Just (s,e,lbls) -> do cat <- range (s,e)
lbl <- indices lbls
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
@@ -167,10 +164,10 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
Nothing -> mzero
go rec fcat' (d,fcat)
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| Set.member fcat rec = mzero
| otherwise = foldForest (\funid args trees ->
do let FFun fn lins = functions pinfo ! funid
do let FFun fn lins = functions cnc ! funid
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
check_ho_fun fn args
`mplus`
@@ -348,7 +345,7 @@ foldForest f g b fcat forest =
-- | An abstract data type whose values represent
-- the current state in an incremental parser.
data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
data ParseState = PState PGF Concr Chart (TMap.TrieMap String (Set.Set Active))
data Chart
= Chart
@@ -367,4 +364,4 @@ data Chart
-- | An abstract data type whose values represent
-- the state in an incremental parser after an error.
data ErrorState = EState PGF ParserInfo Chart
data ErrorState = EState PGF Concr Chart

View File

@@ -0,0 +1,89 @@
module PGF.Printer (ppPGF,ppCat,ppFun) where
import PGF.CId
import PGF.Data
import PGF.Macros
import GF.Data.Operations
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.List
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint
ppPGF :: PGF -> Doc
ppPGF pgf = ppAbs (absname pgf) (abstract pgf) $$ ppAll ppCnc (concretes pgf)
ppAbs :: Language -> Abstr -> Doc
ppAbs name a = text "abstract" <+> ppCId name <+> char '{' $$
nest 2 (ppAll ppCat (cats a) $$
ppAll ppFun (funs a)) $$
char '}'
ppCat :: CId -> [Hypo] -> Doc
ppCat c hyps = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL ppHypo [] hyps))
ppFun :: CId -> (Type,Int,[Equation]) -> Doc
ppFun f (t,_,eqs) = text "fun" <+> ppCId f <+> colon <+> ppType 0 [] t $$
if null eqs
then empty
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
in ppCId f <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]
ppCnc :: Language -> Concr -> Doc
ppCnc name cnc =
text "concrete" <+> ppCId name <+> char '{' $$
nest 2 (text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$
text "functions" $$
nest 2 (vcat (map ppFFun (assocs (functions cnc)))) $$
text "sequences" $$
nest 2 (vcat (map ppSeq (assocs (sequences cnc)))) $$
text "startcats" $$
nest 2 (vcat (map ppStartCat (Map.toList (startCats cnc))))) $$
char '}'
ppProduction (fcat,FApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFFun (funid,FFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
ppStartCat (id,(start,end,labels)) =
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymKS ts) = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "CString"
| fcat == fcatInt = text "CInt"
| fcat == fcatFloat = text "CFloat"
| fcat == fcatVar = text "CVar"
| otherwise = char 'C' <> int fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid
-- Utilities
ppAll :: (a -> b -> Doc) -> Map.Map a b -> Doc
ppAll p m = vcat [ p k v | (k,v) <- Map.toList m]