1
0
forked from GitHub/gf-core

Store FCFPInfo (all information needed for FCFG parsing) in GFCC files, and in the internal DataGFCC.GFCC structure. The parsing information format is still in flux.

This commit is contained in:
bringert
2008-01-03 17:10:05 +00:00
parent a12535c904
commit ad991ef299
11 changed files with 203 additions and 55 deletions

View File

@@ -27,7 +27,6 @@ import GF.Command.PPrTree
import GF.Data.ErrM
import GF.Parsing.FCFG
import GF.Conversion.SimpleToFCFG (convertGrammar)
--import GF.Data.Operations
--import GF.Infra.UseIO
@@ -44,7 +43,7 @@ import System.Directory (doesFileExist)
-- Interface
---------------------------------------------------
data MultiGrammar = MultiGrammar {gfcc :: GFCC, parsers :: [(Language,FCFPInfo)]}
data MultiGrammar = MultiGrammar {gfcc :: GFCC}
type Language = String
type Category = String
type Tree = Exp
@@ -77,10 +76,7 @@ startCat :: MultiGrammar -> Category
file2grammar f = do
gfcc <- file2gfcc f
return (MultiGrammar gfcc (gfcc2parsers gfcc))
gfcc2parsers gfcc =
[(lang, buildFCFPInfo fcfg) | (CId lang,fcfg) <- convertGrammar gfcc]
return (MultiGrammar gfcc)
file2gfcc f = do
s <- readFileIf f
@@ -90,7 +86,7 @@ file2gfcc f = do
linearize mgr lang = GF.GFCC.Linearize.linearize (gfcc mgr) (CId lang)
parse mgr lang cat s =
case lookup lang (parsers mgr) of
case lookParser (gfcc mgr) (CId lang) of
Nothing -> error "no parser"
Just pinfo -> case parseFCF "bottomup" pinfo (CId cat) (words s) of
Ok x -> x
@@ -126,7 +122,7 @@ categories mgr = [c | CId c <- Map.keys (cats (abstract (gfcc mgr)))]
startCat mgr = "S" ----
emptyMultiGrammar = MultiGrammar emptyGFCC []
emptyMultiGrammar = MultiGrammar emptyGFCC
------------ for internal use only

View File

@@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where
import GF.GFCC.CId
import GF.Infra.CompactPrint
import GF.Text.UTF8
import GF.Formalism.FCFG
import GF.Parsing.FCFG.PInfo
import Data.Map
import Data.List
@@ -31,7 +33,8 @@ data Concr = Concr {
lincats :: Map CId Term, -- lin type of a cat
lindefs :: Map CId Term, -- lin default of a cat
printnames :: Map CId Term, -- printname of a cat or a fun
paramlincats :: Map CId Term -- lin type of cat, with printable param names
paramlincats :: Map CId Term, -- lin type of cat, with printable param names
parser :: Maybe FCFPInfo -- parser
}
data Type =
@@ -116,7 +119,6 @@ emptyGFCC = GFCC {
concretes = empty
}
-- default map and filter are for Map here
lmap = Prelude.map
lfilter = Prelude.filter

View File

@@ -2,6 +2,8 @@ module GF.GFCC.Macros where
import GF.GFCC.CId
import GF.GFCC.DataGFCC
import GF.Formalism.FCFG (FGrammar)
import GF.Parsing.FCFG.PInfo (FCFPInfo, fcfPInfoToFGrammar)
----import GF.GFCC.PrintGFCC
import Data.Map
import Data.List
@@ -28,6 +30,12 @@ lookType :: GFCC -> CId -> Type
lookType gfcc f =
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
lookParser :: GFCC -> CId -> Maybe FCFPInfo
lookParser gfcc lang = parser $ lookMap (error "no lang") lang $ concretes gfcc
lookFCFG :: GFCC -> CId -> Maybe FGrammar
lookFCFG gfcc lang = fmap fcfPInfoToFGrammar $ lookParser gfcc lang
lookGlobalFlag :: GFCC -> CId -> String
lookGlobalFlag gfcc f =
lookMap "?" f (gflags gfcc)

View File

@@ -3,8 +3,15 @@ module GF.GFCC.Raw.ConvertGFCC (toGFCC,fromGFCC) where
import GF.GFCC.DataGFCC
import GF.GFCC.Raw.AbsGFCCRaw
import GF.Data.Assoc
import GF.Formalism.FCFG
import GF.Formalism.Utilities (NameProfile(..), Profile(..), SyntaxForest(..))
import GF.Parsing.FCFG.PInfo (FCFPInfo(..))
import qualified Data.Array as Array
import Data.Map
-- convert parsed grammar to internal GFCC
toGFCC :: Grammar -> GFCC
@@ -31,29 +38,88 @@ toGFCC (Grm [
catfuns = fromAscList
[(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
in Abstr aflags funs cats catfuns,
concretes = fromAscList (lmap mkCnc ccs)
concretes = fromAscList [(lang, toConcr ts) | App lang ts <- ccs]
}
where
mkCnc (
App lang [
App (CId "flags") fls,
App (CId "lin") ls,
App (CId "oper") ops,
App (CId "lincat") lincs,
App (CId "lindef") linds,
App (CId "printname") prns,
App (CId "param") params
]) = (lang,
Concr {
cflags = fromAscList [(f,v) | App f [AStr v] <- fls],
lins = fromAscList [(f,toTerm v) | App f [v] <- ls],
opers = fromAscList [(f,toTerm v) | App f [v] <- ops],
lincats = fromAscList [(f,toTerm v) | App f [v] <- lincs],
lindefs = fromAscList [(f,toTerm v) | App f [v] <- linds],
printnames = fromAscList [(f,toTerm v) | App f [v] <- prns],
paramlincats = fromAscList [(f,toTerm v) | App f [v] <- params]
}
)
toConcr :: [RExp] -> Concr
toConcr = foldl add (Concr {
cflags = empty,
lins = empty,
opers = empty,
lincats = empty,
lindefs = empty,
printnames = empty,
paramlincats = empty,
parser = Nothing
})
where
add :: Concr -> RExp -> Concr
add cnc (App (CId "flags") ts) = cnc { cflags = fromAscList [(f,v) | App f [AStr v] <- ts] }
add cnc (App (CId "lin") ts) = cnc { lins = mkTermMap ts }
add cnc (App (CId "oper") ts) = cnc { opers = mkTermMap ts }
add cnc (App (CId "lincat") ts) = cnc { lincats = mkTermMap ts }
add cnc (App (CId "lindef") ts) = cnc { lindefs = mkTermMap ts }
add cnc (App (CId "printname") ts) = cnc { printnames = mkTermMap ts }
add cnc (App (CId "param") ts) = cnc { paramlincats = mkTermMap ts }
add cnc (App (CId "parser") ts) = cnc { parser = Just (toPInfo ts) }
toPInfo :: [RExp] -> FCFPInfo
toPInfo = foldl add (FCFPInfo {
allRules = error "FCFPInfo.allRules",
topdownRules = error "FCFPInfo.topdownRules",
epsilonRules = error "FCFPInfo.epsilonRules",
leftcornerCats = error "FCFPInfo.leftcornerCats",
leftcornerTokens = error "FCFPInfo.leftcornerTokens",
grammarCats = error "FCFPInfo.grammarCats",
grammarToks = error "FCFPInfo.grammarToks",
startupCats = error "FCFPInfo.startupCats"})
where
add :: FCFPInfo -> RExp -> FCFPInfo
add p (App (CId f) ts) =
case f of
"rules" -> p { allRules = mkArray (lmap toFRule ts) }
"topdownrules" -> p { topdownRules = toAssoc expToInt (lmap expToInt) ts }
"epsilonrules" -> p { epsilonRules = lmap expToInt ts }
"lccats" -> p { leftcornerCats = toAssoc expToInt (lmap expToInt) ts }
"lctoks" -> p { leftcornerTokens = toAssoc expToStr (lmap expToInt) ts }
"cats" -> p { grammarCats = lmap expToInt ts }
"toks" -> p { grammarToks = lmap expToStr ts }
"startupcats" -> p { startupCats = fromList [(c, lmap expToInt cs) | App c cs <- ts] }
toFRule :: RExp -> FRule
toFRule (App (CId "rule")
[n,
App (CId "cats") (rt:at),
App (CId "R") ls]) = FRule name args res lins
where
name = toFName n
args = lmap expToInt at
res = expToInt rt
lins = mkArray [mkArray [toSymbol s | s <- l] | App (CId "S") l <- ls]
toFName :: RExp -> FName
toFName (App (CId "_A") [x]) = Name (CId "_") [Unify [expToInt x]]
toFName (App f ts) = Name f (lmap toProfile ts)
where
toProfile :: RExp -> Profile (SyntaxForest CId)
toProfile AMet = Unify []
toProfile (App (CId "_A") [t]) = Unify [expToInt t]
toProfile (App (CId "_U") ts) = Unify [expToInt t | App (CId "_A") [t] <- ts]
toProfile t = Constant (toSyntaxForest t)
toSyntaxForest :: RExp -> SyntaxForest CId
toSyntaxForest AMet = FMeta
toSyntaxForest (App n ts) = FNode n [lmap toSyntaxForest ts]
toSyntaxForest (AStr s) = FString s
toSyntaxForest (AInt i) = FInt i
toSyntaxForest (AFlt f) = FFloat f
toSymbol :: RExp -> FSymbol
toSymbol (App (CId "proj") [c,n,l]) = FSymCat (expToInt c) (expToInt l) (expToInt n)
toSymbol (AStr t) = FSymTok t
toAssoc :: Ord a => (RExp -> a) -> ([RExp] -> b) -> [RExp] -> Assoc a b
toAssoc f g xs = listAssoc [(f k, g v) | App (CId "map") (k:v) <- xs]
toType :: RExp -> Type
toType e = case e of
@@ -120,7 +186,7 @@ fromGFCC gfcc0 = Grm [
app "lindef" [App f [fromTerm v] | (f,v) <- toList (lindefs cnc)],
app "printname" [App f [fromTerm v] | (f,v) <- toList (printnames cnc)],
app "param" [App f [fromTerm v] | (f,v) <- toList (paramlincats cnc)]
]
] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
fromType :: Type -> RExp
fromType e = case e of
@@ -163,3 +229,74 @@ fromTerm e = case e of
where
app = App . CId
str v = app "S" (lmap AStr v)
-- ** Parsing info
fromPInfo :: FCFPInfo -> RExp
fromPInfo p = app "parser" [
app "rules" [fromFRule rule | rule <- Array.elems (allRules p)],
app "topdownrules" (fromAssoc intToExp (lmap intToExp) (topdownRules p)),
app "epsilonrules" (lmap intToExp (epsilonRules p)),
app "lccats" (fromAssoc intToExp (lmap intToExp) (leftcornerCats p)),
app "lctoks" (fromAssoc AStr (lmap intToExp) (leftcornerTokens p)),
app "cats" (lmap intToExp (grammarCats p)),
app "toks" (lmap AStr (grammarToks p)),
app "startupcats" [App f (lmap intToExp cs) | (f,cs) <- toList (startupCats p)]
]
fromAssoc :: Ord a => (a -> RExp) -> (b -> [RExp]) -> Assoc a b -> [RExp]
fromAssoc f g xs = [app "map" (f x:g y) | (x,y) <- aAssocs xs]
fromFRule :: FRule -> RExp
fromFRule (FRule n args res lins) =
app "rule" [fromFName n,
app "cats" (intToExp res:lmap intToExp args),
app "R" [app "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins]
]
fromFName :: FName -> RExp
fromFName n = case n of
Name (CId "_") [p] -> fromProfile p
Name f ps -> App f (lmap fromProfile ps)
where
fromProfile :: Profile (SyntaxForest CId) -> RExp
fromProfile (Unify []) = AMet
fromProfile (Unify [x]) = daughter x
fromProfile (Unify args) = app "_U" (lmap daughter args)
fromProfile (Constant forest) = fromSyntaxForest forest
daughter n = app "_A" [intToExp n]
fromSyntaxForest :: SyntaxForest CId -> RExp
fromSyntaxForest FMeta = AMet
-- FIXME: is there always just one element here?
fromSyntaxForest (FNode n [args]) = App n (lmap fromSyntaxForest args)
fromSyntaxForest (FString s) = AStr s
fromSyntaxForest (FInt i) = AInt i
fromSyntaxForest (FFloat f) = AFlt f
fromSymbol :: FSymbol -> RExp
fromSymbol (FSymCat c l n) = app "proj" [intToExp c, intToExp n, intToExp l]
fromSymbol (FSymTok t) = AStr t
-- ** Utilities
mkTermMap :: [RExp] -> Map CId Term
mkTermMap ts = fromAscList [(f,toTerm v) | App f [v] <- ts]
app :: String -> [RExp] -> RExp
app = App . CId
mkArray :: [a] -> Array.Array Int a
mkArray xs = Array.listArray (0, length xs - 1) xs
expToInt :: Integral a => RExp -> a
expToInt (App (CId "neg") [AInt i]) = fromIntegral (negate i)
expToInt (AInt i) = fromIntegral i
expToStr :: RExp -> String
expToStr (AStr s) = s
intToExp :: Integral a => a -> RExp
intToExp x | x < 0 = App (CId "neg") [AInt (fromIntegral (negate x))]
| otherwise = AInt (fromIntegral x)