1
0
forked from GitHub/gf-core

Store FCFPInfo (all information needed for FCFG parsing) in GFCC files, and in the internal DataGFCC.GFCC structure. The parsing information format is still in flux.

This commit is contained in:
bringert
2008-01-03 17:10:05 +00:00
parent a12535c904
commit ad991ef299
11 changed files with 203 additions and 55 deletions

View File

@@ -25,7 +25,7 @@ source2gfcc opts gf =
let
(abs,gfcc) = mkCanon2gfcc opts (gfcabs gf) gf
gfcc1 = maybe undefined id $ checkGFCCmaybe gfcc
in if oElem (iOpt "noopt") opts then gfcc1 else optGFCC gfcc1
in addParsers $ if oElem (iOpt "noopt") opts then gfcc1 else optGFCC gfcc1
gfcabs gfc =
prt $ head $ M.allConcretes gfc $ maybe (error "no abstract") id $

View File

@@ -22,10 +22,8 @@ importGrammar mgr0 opts files = do
let name = justModuleName (last files)
let (abs,gfcc0) = mkCanon2gfcc opts name gr
gfcc1 <- checkGFCCio gfcc0
return $ if oElem (iOpt "noopt") opts then gfcc1 else optGFCC gfcc1
return $ addParsers $ if oElem (iOpt "noopt") opts then gfcc1 else optGFCC gfcc1
"gfcc" ->
mapM file2gfcc files >>= return . foldl1 unionGFCC
let gfcc3 = unionGFCC (gfcc mgr0) gfcc2
return $ MultiGrammar gfcc3
(nubBy (\ (x,_) (y,_) -> x == y) (gfcc2parsers gfcc3 ++ parsers mgr0))
-- later coming parsers override
return $ MultiGrammar gfcc3

View File

@@ -19,6 +19,7 @@ import GF.Canon.GFC
import GF.Canon.AbsGFC
import GF.GFCC.CId
--import GF.GFCC.DataGFCC(mkGFCC)
import GF.GFCC.Macros (lookFCFG)
import GF.Canon.CanonToGFCC
import GF.Grammar.Macros
import GF.Grammar.MMacros
@@ -263,9 +264,8 @@ updateShellState opts ign mcnc sh ((_,sgr,gr,eenv),rts) = do
let fromGFC = snd . snd . Cnv.convertGFC opts
(mcfgs, cfgs) = unzip $ map (curry fromGFC cgr) concrs
fcfgs0 = [(IC id,g) | (CId id,g) <-
FCnv.convertGrammar (canon2gfcc opts cgr)] ---- UTF8
fcfgs = [(c,g) | c <- concrs, Just g <- [lookup c fcfgs0]]
gfcc = canon2gfcc opts cgr ---- UTF8
fcfgs = [(c,g) | c@(IC cn) <- concrs, Just g <- [lookFCFG gfcc (CId cn)]]
pInfos = zipWith3 Prs.buildPInfo mcfgs (map snd fcfgs) cfgs
let funs = funRulesOf cgr

View File

@@ -13,7 +13,7 @@
module GF.Conversion.SimpleToFCFG
(convertGrammar) where
(convertConcrete) where
import GF.Infra.PrintClass
@@ -39,19 +39,14 @@ import Data.Maybe
----------------------------------------------------------------------
-- main conversion function
convertGrammar :: GFCC -> [(CId,FGrammar)]
convertGrammar gfcc =
[(cncname,convert abs_defs conc cats)
| cncname <- cncnames gfcc,
cnc <- Map.lookup cncname (concretes gfcc),
let conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
convertConcrete :: Abstr -> Concr -> FGrammar
convertConcrete abs cnc = convert abs_defs conc cats
where abs_defs = Map.assocs (funs abs)
conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
cats = lincats cnc
]
where
abs_defs = Map.assocs (funs (abstract gfcc))
convert :: [(CId,(Type,Exp))] -> TermMap -> TermMap -> FGrammar
convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
convert :: [(CId,(Type,Exp))] -> TermMap -> TermMap -> FGrammar
convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv)
where
srules = [
(XRule id args res (map findLinType args) (findLinType res) term) |

View File

@@ -24,7 +24,7 @@ mainGFC xx = do
let name = justModuleName (last fs)
let (abs,gc0) = mkCanon2gfcc opts name gr
gc1 <- checkGFCCio gc0
let gc = if oElem (iOpt "noopt") opts then gc1 else optGFCC gc1
let gc = addParsers $ if oElem (iOpt "noopt") opts then gc1 else optGFCC gc1
let target = targetName opts abs
let gfccFile = target ++ ".gfcc"
writeFile gfccFile (printGFCC gc)

View File

@@ -1,4 +1,4 @@
module GF.Devel.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc) where
module GF.Devel.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc,addParsers) where
import GF.Devel.OptimizeGF (unshareModule)
@@ -15,6 +15,8 @@ import qualified GF.Grammar.Macros as GM
import qualified GF.Infra.Modules as M
import qualified GF.Infra.Option as O
import GF.Conversion.SimpleToFCFG (convertConcrete)
import GF.Parsing.FCFG.PInfo (buildFCFPInfo)
import GF.Devel.PrGrammar
import GF.Devel.PrintGFCC
import GF.Devel.ModDeps
@@ -41,6 +43,12 @@ mkCanon2gfcc opts cnc gr =
abs = err error id $ M.abstractOfConcrete gr (identC cnc)
pars = mkParamLincat gr
-- Adds parsers for all concretes
addParsers :: D.GFCC -> D.GFCC
addParsers gfcc = gfcc { D.concretes = Map.map conv (D.concretes gfcc) }
where
conv cnc = cnc { D.parser = Just (buildFCFPInfo (convertConcrete (D.abstract gfcc) cnc)) }
-- Generate GFCC from GFCM.
-- this assumes a grammar translated by canon2canon
@@ -72,7 +80,7 @@ canon2gfcc opts pars cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
cncs = Map.fromList [mkConcr lang (i2i lang) mo | (lang,M.ModMod mo) <- cms]
mkConcr lang0 lang mo =
(lang,D.Concr flags lins opers lincats lindefs printnames params)
(lang,D.Concr flags lins opers lincats lindefs printnames params fcfg)
where
js = tree2list (M.jments mo)
flags = Map.fromList [(CId f,x) | Opt (f,[x]) <- M.flags mo]
@@ -90,6 +98,7 @@ canon2gfcc opts pars cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
(Map.fromAscList [(i2i f, mkTerm tr) | (f,CncCat _ _ (Yes tr)) <- js])
params = Map.fromAscList
[(i2i c, pars lang0 c) | (c,CncCat (Yes ty) _ _) <- js]
fcfg = Nothing
i2i :: Ident -> CId
i2i = CId . prIdent

View File

@@ -27,7 +27,6 @@ import GF.Command.PPrTree
import GF.Data.ErrM
import GF.Parsing.FCFG
import GF.Conversion.SimpleToFCFG (convertGrammar)
--import GF.Data.Operations
--import GF.Infra.UseIO
@@ -44,7 +43,7 @@ import System.Directory (doesFileExist)
-- Interface
---------------------------------------------------
data MultiGrammar = MultiGrammar {gfcc :: GFCC, parsers :: [(Language,FCFPInfo)]}
data MultiGrammar = MultiGrammar {gfcc :: GFCC}
type Language = String
type Category = String
type Tree = Exp
@@ -77,10 +76,7 @@ startCat :: MultiGrammar -> Category
file2grammar f = do
gfcc <- file2gfcc f
return (MultiGrammar gfcc (gfcc2parsers gfcc))
gfcc2parsers gfcc =
[(lang, buildFCFPInfo fcfg) | (CId lang,fcfg) <- convertGrammar gfcc]
return (MultiGrammar gfcc)
file2gfcc f = do
s <- readFileIf f
@@ -90,7 +86,7 @@ file2gfcc f = do
linearize mgr lang = GF.GFCC.Linearize.linearize (gfcc mgr) (CId lang)
parse mgr lang cat s =
case lookup lang (parsers mgr) of
case lookParser (gfcc mgr) (CId lang) of
Nothing -> error "no parser"
Just pinfo -> case parseFCF "bottomup" pinfo (CId cat) (words s) of
Ok x -> x
@@ -126,7 +122,7 @@ categories mgr = [c | CId c <- Map.keys (cats (abstract (gfcc mgr)))]
startCat mgr = "S" ----
emptyMultiGrammar = MultiGrammar emptyGFCC []
emptyMultiGrammar = MultiGrammar emptyGFCC
------------ for internal use only

View File

@@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where
import GF.GFCC.CId
import GF.Infra.CompactPrint
import GF.Text.UTF8
import GF.Formalism.FCFG
import GF.Parsing.FCFG.PInfo
import Data.Map
import Data.List
@@ -31,7 +33,8 @@ data Concr = Concr {
lincats :: Map CId Term, -- lin type of a cat
lindefs :: Map CId Term, -- lin default of a cat
printnames :: Map CId Term, -- printname of a cat or a fun
paramlincats :: Map CId Term -- lin type of cat, with printable param names
paramlincats :: Map CId Term, -- lin type of cat, with printable param names
parser :: Maybe FCFPInfo -- parser
}
data Type =
@@ -116,7 +119,6 @@ emptyGFCC = GFCC {
concretes = empty
}
-- default map and filter are for Map here
lmap = Prelude.map
lfilter = Prelude.filter

View File

@@ -2,6 +2,8 @@ module GF.GFCC.Macros where
import GF.GFCC.CId
import GF.GFCC.DataGFCC
import GF.Formalism.FCFG (FGrammar)
import GF.Parsing.FCFG.PInfo (FCFPInfo, fcfPInfoToFGrammar)
----import GF.GFCC.PrintGFCC
import Data.Map
import Data.List
@@ -28,6 +30,12 @@ lookType :: GFCC -> CId -> Type
lookType gfcc f =
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
lookParser :: GFCC -> CId -> Maybe FCFPInfo
lookParser gfcc lang = parser $ lookMap (error "no lang") lang $ concretes gfcc
lookFCFG :: GFCC -> CId -> Maybe FGrammar
lookFCFG gfcc lang = fmap fcfPInfoToFGrammar $ lookParser gfcc lang
lookGlobalFlag :: GFCC -> CId -> String
lookGlobalFlag gfcc f =
lookMap "?" f (gflags gfcc)

View File

@@ -3,8 +3,15 @@ module GF.GFCC.Raw.ConvertGFCC (toGFCC,fromGFCC) where
import GF.GFCC.DataGFCC
import GF.GFCC.Raw.AbsGFCCRaw
import GF.Data.Assoc
import GF.Formalism.FCFG
import GF.Formalism.Utilities (NameProfile(..), Profile(..), SyntaxForest(..))
import GF.Parsing.FCFG.PInfo (FCFPInfo(..))
import qualified Data.Array as Array
import Data.Map
-- convert parsed grammar to internal GFCC
toGFCC :: Grammar -> GFCC
@@ -31,29 +38,88 @@ toGFCC (Grm [
catfuns = fromAscList
[(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
in Abstr aflags funs cats catfuns,
concretes = fromAscList (lmap mkCnc ccs)
concretes = fromAscList [(lang, toConcr ts) | App lang ts <- ccs]
}
where
mkCnc (
App lang [
App (CId "flags") fls,
App (CId "lin") ls,
App (CId "oper") ops,
App (CId "lincat") lincs,
App (CId "lindef") linds,
App (CId "printname") prns,
App (CId "param") params
]) = (lang,
Concr {
cflags = fromAscList [(f,v) | App f [AStr v] <- fls],
lins = fromAscList [(f,toTerm v) | App f [v] <- ls],
opers = fromAscList [(f,toTerm v) | App f [v] <- ops],
lincats = fromAscList [(f,toTerm v) | App f [v] <- lincs],
lindefs = fromAscList [(f,toTerm v) | App f [v] <- linds],
printnames = fromAscList [(f,toTerm v) | App f [v] <- prns],
paramlincats = fromAscList [(f,toTerm v) | App f [v] <- params]
}
)
toConcr :: [RExp] -> Concr
toConcr = foldl add (Concr {
cflags = empty,
lins = empty,
opers = empty,
lincats = empty,
lindefs = empty,
printnames = empty,
paramlincats = empty,
parser = Nothing
})
where
add :: Concr -> RExp -> Concr
add cnc (App (CId "flags") ts) = cnc { cflags = fromAscList [(f,v) | App f [AStr v] <- ts] }
add cnc (App (CId "lin") ts) = cnc { lins = mkTermMap ts }
add cnc (App (CId "oper") ts) = cnc { opers = mkTermMap ts }
add cnc (App (CId "lincat") ts) = cnc { lincats = mkTermMap ts }
add cnc (App (CId "lindef") ts) = cnc { lindefs = mkTermMap ts }
add cnc (App (CId "printname") ts) = cnc { printnames = mkTermMap ts }
add cnc (App (CId "param") ts) = cnc { paramlincats = mkTermMap ts }
add cnc (App (CId "parser") ts) = cnc { parser = Just (toPInfo ts) }
toPInfo :: [RExp] -> FCFPInfo
toPInfo = foldl add (FCFPInfo {
allRules = error "FCFPInfo.allRules",
topdownRules = error "FCFPInfo.topdownRules",
epsilonRules = error "FCFPInfo.epsilonRules",
leftcornerCats = error "FCFPInfo.leftcornerCats",
leftcornerTokens = error "FCFPInfo.leftcornerTokens",
grammarCats = error "FCFPInfo.grammarCats",
grammarToks = error "FCFPInfo.grammarToks",
startupCats = error "FCFPInfo.startupCats"})
where
add :: FCFPInfo -> RExp -> FCFPInfo
add p (App (CId f) ts) =
case f of
"rules" -> p { allRules = mkArray (lmap toFRule ts) }
"topdownrules" -> p { topdownRules = toAssoc expToInt (lmap expToInt) ts }
"epsilonrules" -> p { epsilonRules = lmap expToInt ts }
"lccats" -> p { leftcornerCats = toAssoc expToInt (lmap expToInt) ts }
"lctoks" -> p { leftcornerTokens = toAssoc expToStr (lmap expToInt) ts }
"cats" -> p { grammarCats = lmap expToInt ts }
"toks" -> p { grammarToks = lmap expToStr ts }
"startupcats" -> p { startupCats = fromList [(c, lmap expToInt cs) | App c cs <- ts] }
toFRule :: RExp -> FRule
toFRule (App (CId "rule")
[n,
App (CId "cats") (rt:at),
App (CId "R") ls]) = FRule name args res lins
where
name = toFName n
args = lmap expToInt at
res = expToInt rt
lins = mkArray [mkArray [toSymbol s | s <- l] | App (CId "S") l <- ls]
toFName :: RExp -> FName
toFName (App (CId "_A") [x]) = Name (CId "_") [Unify [expToInt x]]
toFName (App f ts) = Name f (lmap toProfile ts)
where
toProfile :: RExp -> Profile (SyntaxForest CId)
toProfile AMet = Unify []
toProfile (App (CId "_A") [t]) = Unify [expToInt t]
toProfile (App (CId "_U") ts) = Unify [expToInt t | App (CId "_A") [t] <- ts]
toProfile t = Constant (toSyntaxForest t)
toSyntaxForest :: RExp -> SyntaxForest CId
toSyntaxForest AMet = FMeta
toSyntaxForest (App n ts) = FNode n [lmap toSyntaxForest ts]
toSyntaxForest (AStr s) = FString s
toSyntaxForest (AInt i) = FInt i
toSyntaxForest (AFlt f) = FFloat f
toSymbol :: RExp -> FSymbol
toSymbol (App (CId "proj") [c,n,l]) = FSymCat (expToInt c) (expToInt l) (expToInt n)
toSymbol (AStr t) = FSymTok t
toAssoc :: Ord a => (RExp -> a) -> ([RExp] -> b) -> [RExp] -> Assoc a b
toAssoc f g xs = listAssoc [(f k, g v) | App (CId "map") (k:v) <- xs]
toType :: RExp -> Type
toType e = case e of
@@ -120,7 +186,7 @@ fromGFCC gfcc0 = Grm [
app "lindef" [App f [fromTerm v] | (f,v) <- toList (lindefs cnc)],
app "printname" [App f [fromTerm v] | (f,v) <- toList (printnames cnc)],
app "param" [App f [fromTerm v] | (f,v) <- toList (paramlincats cnc)]
]
] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
fromType :: Type -> RExp
fromType e = case e of
@@ -163,3 +229,74 @@ fromTerm e = case e of
where
app = App . CId
str v = app "S" (lmap AStr v)
-- ** Parsing info
fromPInfo :: FCFPInfo -> RExp
fromPInfo p = app "parser" [
app "rules" [fromFRule rule | rule <- Array.elems (allRules p)],
app "topdownrules" (fromAssoc intToExp (lmap intToExp) (topdownRules p)),
app "epsilonrules" (lmap intToExp (epsilonRules p)),
app "lccats" (fromAssoc intToExp (lmap intToExp) (leftcornerCats p)),
app "lctoks" (fromAssoc AStr (lmap intToExp) (leftcornerTokens p)),
app "cats" (lmap intToExp (grammarCats p)),
app "toks" (lmap AStr (grammarToks p)),
app "startupcats" [App f (lmap intToExp cs) | (f,cs) <- toList (startupCats p)]
]
fromAssoc :: Ord a => (a -> RExp) -> (b -> [RExp]) -> Assoc a b -> [RExp]
fromAssoc f g xs = [app "map" (f x:g y) | (x,y) <- aAssocs xs]
fromFRule :: FRule -> RExp
fromFRule (FRule n args res lins) =
app "rule" [fromFName n,
app "cats" (intToExp res:lmap intToExp args),
app "R" [app "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins]
]
fromFName :: FName -> RExp
fromFName n = case n of
Name (CId "_") [p] -> fromProfile p
Name f ps -> App f (lmap fromProfile ps)
where
fromProfile :: Profile (SyntaxForest CId) -> RExp
fromProfile (Unify []) = AMet
fromProfile (Unify [x]) = daughter x
fromProfile (Unify args) = app "_U" (lmap daughter args)
fromProfile (Constant forest) = fromSyntaxForest forest
daughter n = app "_A" [intToExp n]
fromSyntaxForest :: SyntaxForest CId -> RExp
fromSyntaxForest FMeta = AMet
-- FIXME: is there always just one element here?
fromSyntaxForest (FNode n [args]) = App n (lmap fromSyntaxForest args)
fromSyntaxForest (FString s) = AStr s
fromSyntaxForest (FInt i) = AInt i
fromSyntaxForest (FFloat f) = AFlt f
fromSymbol :: FSymbol -> RExp
fromSymbol (FSymCat c l n) = app "proj" [intToExp c, intToExp n, intToExp l]
fromSymbol (FSymTok t) = AStr t
-- ** Utilities
mkTermMap :: [RExp] -> Map CId Term
mkTermMap ts = fromAscList [(f,toTerm v) | App f [v] <- ts]
app :: String -> [RExp] -> RExp
app = App . CId
mkArray :: [a] -> Array.Array Int a
mkArray xs = Array.listArray (0, length xs - 1) xs
expToInt :: Integral a => RExp -> a
expToInt (App (CId "neg") [AInt i]) = fromIntegral (negate i)
expToInt (AInt i) = fromIntegral i
expToStr :: RExp -> String
expToStr (AStr s) = s
intToExp :: Integral a => a -> RExp
intToExp x | x < 0 = App (CId "neg") [AInt (fromIntegral (negate x))]
| otherwise = AInt (fromIntegral x)

View File

@@ -99,6 +99,9 @@ buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x
grammarcats = aElems topdownrules
grammartoks = nubsort [t | (FRule _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin]
fcfPInfoToFGrammar :: FCFPInfo -> FGrammar
fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo)
----------------------------------------------------------------------
-- pretty-printing of statistics