1
0
forked from GitHub/gf-core

Store FCFPInfo (all information needed for FCFG parsing) in GFCC files, and in the internal DataGFCC.GFCC structure. The parsing information format is still in flux.

This commit is contained in:
bringert
2008-01-03 17:10:05 +00:00
parent 43ddb41d31
commit af1a3a2473
11 changed files with 203 additions and 55 deletions

View File

@@ -3,8 +3,15 @@ module GF.GFCC.Raw.ConvertGFCC (toGFCC,fromGFCC) where
import GF.GFCC.DataGFCC
import GF.GFCC.Raw.AbsGFCCRaw
import GF.Data.Assoc
import GF.Formalism.FCFG
import GF.Formalism.Utilities (NameProfile(..), Profile(..), SyntaxForest(..))
import GF.Parsing.FCFG.PInfo (FCFPInfo(..))
import qualified Data.Array as Array
import Data.Map
-- convert parsed grammar to internal GFCC
toGFCC :: Grammar -> GFCC
@@ -31,29 +38,88 @@ toGFCC (Grm [
catfuns = fromAscList
[(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
in Abstr aflags funs cats catfuns,
concretes = fromAscList (lmap mkCnc ccs)
concretes = fromAscList [(lang, toConcr ts) | App lang ts <- ccs]
}
where
mkCnc (
App lang [
App (CId "flags") fls,
App (CId "lin") ls,
App (CId "oper") ops,
App (CId "lincat") lincs,
App (CId "lindef") linds,
App (CId "printname") prns,
App (CId "param") params
]) = (lang,
Concr {
cflags = fromAscList [(f,v) | App f [AStr v] <- fls],
lins = fromAscList [(f,toTerm v) | App f [v] <- ls],
opers = fromAscList [(f,toTerm v) | App f [v] <- ops],
lincats = fromAscList [(f,toTerm v) | App f [v] <- lincs],
lindefs = fromAscList [(f,toTerm v) | App f [v] <- linds],
printnames = fromAscList [(f,toTerm v) | App f [v] <- prns],
paramlincats = fromAscList [(f,toTerm v) | App f [v] <- params]
}
)
toConcr :: [RExp] -> Concr
toConcr = foldl add (Concr {
cflags = empty,
lins = empty,
opers = empty,
lincats = empty,
lindefs = empty,
printnames = empty,
paramlincats = empty,
parser = Nothing
})
where
add :: Concr -> RExp -> Concr
add cnc (App (CId "flags") ts) = cnc { cflags = fromAscList [(f,v) | App f [AStr v] <- ts] }
add cnc (App (CId "lin") ts) = cnc { lins = mkTermMap ts }
add cnc (App (CId "oper") ts) = cnc { opers = mkTermMap ts }
add cnc (App (CId "lincat") ts) = cnc { lincats = mkTermMap ts }
add cnc (App (CId "lindef") ts) = cnc { lindefs = mkTermMap ts }
add cnc (App (CId "printname") ts) = cnc { printnames = mkTermMap ts }
add cnc (App (CId "param") ts) = cnc { paramlincats = mkTermMap ts }
add cnc (App (CId "parser") ts) = cnc { parser = Just (toPInfo ts) }
toPInfo :: [RExp] -> FCFPInfo
toPInfo = foldl add (FCFPInfo {
allRules = error "FCFPInfo.allRules",
topdownRules = error "FCFPInfo.topdownRules",
epsilonRules = error "FCFPInfo.epsilonRules",
leftcornerCats = error "FCFPInfo.leftcornerCats",
leftcornerTokens = error "FCFPInfo.leftcornerTokens",
grammarCats = error "FCFPInfo.grammarCats",
grammarToks = error "FCFPInfo.grammarToks",
startupCats = error "FCFPInfo.startupCats"})
where
add :: FCFPInfo -> RExp -> FCFPInfo
add p (App (CId f) ts) =
case f of
"rules" -> p { allRules = mkArray (lmap toFRule ts) }
"topdownrules" -> p { topdownRules = toAssoc expToInt (lmap expToInt) ts }
"epsilonrules" -> p { epsilonRules = lmap expToInt ts }
"lccats" -> p { leftcornerCats = toAssoc expToInt (lmap expToInt) ts }
"lctoks" -> p { leftcornerTokens = toAssoc expToStr (lmap expToInt) ts }
"cats" -> p { grammarCats = lmap expToInt ts }
"toks" -> p { grammarToks = lmap expToStr ts }
"startupcats" -> p { startupCats = fromList [(c, lmap expToInt cs) | App c cs <- ts] }
toFRule :: RExp -> FRule
toFRule (App (CId "rule")
[n,
App (CId "cats") (rt:at),
App (CId "R") ls]) = FRule name args res lins
where
name = toFName n
args = lmap expToInt at
res = expToInt rt
lins = mkArray [mkArray [toSymbol s | s <- l] | App (CId "S") l <- ls]
toFName :: RExp -> FName
toFName (App (CId "_A") [x]) = Name (CId "_") [Unify [expToInt x]]
toFName (App f ts) = Name f (lmap toProfile ts)
where
toProfile :: RExp -> Profile (SyntaxForest CId)
toProfile AMet = Unify []
toProfile (App (CId "_A") [t]) = Unify [expToInt t]
toProfile (App (CId "_U") ts) = Unify [expToInt t | App (CId "_A") [t] <- ts]
toProfile t = Constant (toSyntaxForest t)
toSyntaxForest :: RExp -> SyntaxForest CId
toSyntaxForest AMet = FMeta
toSyntaxForest (App n ts) = FNode n [lmap toSyntaxForest ts]
toSyntaxForest (AStr s) = FString s
toSyntaxForest (AInt i) = FInt i
toSyntaxForest (AFlt f) = FFloat f
toSymbol :: RExp -> FSymbol
toSymbol (App (CId "proj") [c,n,l]) = FSymCat (expToInt c) (expToInt l) (expToInt n)
toSymbol (AStr t) = FSymTok t
toAssoc :: Ord a => (RExp -> a) -> ([RExp] -> b) -> [RExp] -> Assoc a b
toAssoc f g xs = listAssoc [(f k, g v) | App (CId "map") (k:v) <- xs]
toType :: RExp -> Type
toType e = case e of
@@ -120,7 +186,7 @@ fromGFCC gfcc0 = Grm [
app "lindef" [App f [fromTerm v] | (f,v) <- toList (lindefs cnc)],
app "printname" [App f [fromTerm v] | (f,v) <- toList (printnames cnc)],
app "param" [App f [fromTerm v] | (f,v) <- toList (paramlincats cnc)]
]
] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
fromType :: Type -> RExp
fromType e = case e of
@@ -163,3 +229,74 @@ fromTerm e = case e of
where
app = App . CId
str v = app "S" (lmap AStr v)
-- ** Parsing info
fromPInfo :: FCFPInfo -> RExp
fromPInfo p = app "parser" [
app "rules" [fromFRule rule | rule <- Array.elems (allRules p)],
app "topdownrules" (fromAssoc intToExp (lmap intToExp) (topdownRules p)),
app "epsilonrules" (lmap intToExp (epsilonRules p)),
app "lccats" (fromAssoc intToExp (lmap intToExp) (leftcornerCats p)),
app "lctoks" (fromAssoc AStr (lmap intToExp) (leftcornerTokens p)),
app "cats" (lmap intToExp (grammarCats p)),
app "toks" (lmap AStr (grammarToks p)),
app "startupcats" [App f (lmap intToExp cs) | (f,cs) <- toList (startupCats p)]
]
fromAssoc :: Ord a => (a -> RExp) -> (b -> [RExp]) -> Assoc a b -> [RExp]
fromAssoc f g xs = [app "map" (f x:g y) | (x,y) <- aAssocs xs]
fromFRule :: FRule -> RExp
fromFRule (FRule n args res lins) =
app "rule" [fromFName n,
app "cats" (intToExp res:lmap intToExp args),
app "R" [app "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins]
]
fromFName :: FName -> RExp
fromFName n = case n of
Name (CId "_") [p] -> fromProfile p
Name f ps -> App f (lmap fromProfile ps)
where
fromProfile :: Profile (SyntaxForest CId) -> RExp
fromProfile (Unify []) = AMet
fromProfile (Unify [x]) = daughter x
fromProfile (Unify args) = app "_U" (lmap daughter args)
fromProfile (Constant forest) = fromSyntaxForest forest
daughter n = app "_A" [intToExp n]
fromSyntaxForest :: SyntaxForest CId -> RExp
fromSyntaxForest FMeta = AMet
-- FIXME: is there always just one element here?
fromSyntaxForest (FNode n [args]) = App n (lmap fromSyntaxForest args)
fromSyntaxForest (FString s) = AStr s
fromSyntaxForest (FInt i) = AInt i
fromSyntaxForest (FFloat f) = AFlt f
fromSymbol :: FSymbol -> RExp
fromSymbol (FSymCat c l n) = app "proj" [intToExp c, intToExp n, intToExp l]
fromSymbol (FSymTok t) = AStr t
-- ** Utilities
mkTermMap :: [RExp] -> Map CId Term
mkTermMap ts = fromAscList [(f,toTerm v) | App f [v] <- ts]
app :: String -> [RExp] -> RExp
app = App . CId
mkArray :: [a] -> Array.Array Int a
mkArray xs = Array.listArray (0, length xs - 1) xs
expToInt :: Integral a => RExp -> a
expToInt (App (CId "neg") [AInt i]) = fromIntegral (negate i)
expToInt (AInt i) = fromIntegral i
expToStr :: RExp -> String
expToStr (AStr s) = s
intToExp :: Integral a => a -> RExp
intToExp x | x < 0 = App (CId "neg") [AInt (fromIntegral (negate x))]
| otherwise = AInt (fromIntegral x)