forked from GitHub/gf-core
Store FCFPInfo (all information needed for FCFG parsing) in GFCC files, and in the internal DataGFCC.GFCC structure. The parsing information format is still in flux.
This commit is contained in:
@@ -27,7 +27,6 @@ import GF.Command.PPrTree
|
||||
import GF.Data.ErrM
|
||||
|
||||
import GF.Parsing.FCFG
|
||||
import GF.Conversion.SimpleToFCFG (convertGrammar)
|
||||
|
||||
--import GF.Data.Operations
|
||||
--import GF.Infra.UseIO
|
||||
@@ -44,7 +43,7 @@ import System.Directory (doesFileExist)
|
||||
-- Interface
|
||||
---------------------------------------------------
|
||||
|
||||
data MultiGrammar = MultiGrammar {gfcc :: GFCC, parsers :: [(Language,FCFPInfo)]}
|
||||
data MultiGrammar = MultiGrammar {gfcc :: GFCC}
|
||||
type Language = String
|
||||
type Category = String
|
||||
type Tree = Exp
|
||||
@@ -77,10 +76,7 @@ startCat :: MultiGrammar -> Category
|
||||
|
||||
file2grammar f = do
|
||||
gfcc <- file2gfcc f
|
||||
return (MultiGrammar gfcc (gfcc2parsers gfcc))
|
||||
|
||||
gfcc2parsers gfcc =
|
||||
[(lang, buildFCFPInfo fcfg) | (CId lang,fcfg) <- convertGrammar gfcc]
|
||||
return (MultiGrammar gfcc)
|
||||
|
||||
file2gfcc f = do
|
||||
s <- readFileIf f
|
||||
@@ -90,7 +86,7 @@ file2gfcc f = do
|
||||
linearize mgr lang = GF.GFCC.Linearize.linearize (gfcc mgr) (CId lang)
|
||||
|
||||
parse mgr lang cat s =
|
||||
case lookup lang (parsers mgr) of
|
||||
case lookParser (gfcc mgr) (CId lang) of
|
||||
Nothing -> error "no parser"
|
||||
Just pinfo -> case parseFCF "bottomup" pinfo (CId cat) (words s) of
|
||||
Ok x -> x
|
||||
@@ -126,7 +122,7 @@ categories mgr = [c | CId c <- Map.keys (cats (abstract (gfcc mgr)))]
|
||||
|
||||
startCat mgr = "S" ----
|
||||
|
||||
emptyMultiGrammar = MultiGrammar emptyGFCC []
|
||||
emptyMultiGrammar = MultiGrammar emptyGFCC
|
||||
|
||||
------------ for internal use only
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where
|
||||
import GF.GFCC.CId
|
||||
import GF.Infra.CompactPrint
|
||||
import GF.Text.UTF8
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Parsing.FCFG.PInfo
|
||||
|
||||
import Data.Map
|
||||
import Data.List
|
||||
@@ -31,7 +33,8 @@ data Concr = Concr {
|
||||
lincats :: Map CId Term, -- lin type of a cat
|
||||
lindefs :: Map CId Term, -- lin default of a cat
|
||||
printnames :: Map CId Term, -- printname of a cat or a fun
|
||||
paramlincats :: Map CId Term -- lin type of cat, with printable param names
|
||||
paramlincats :: Map CId Term, -- lin type of cat, with printable param names
|
||||
parser :: Maybe FCFPInfo -- parser
|
||||
}
|
||||
|
||||
data Type =
|
||||
@@ -116,7 +119,6 @@ emptyGFCC = GFCC {
|
||||
concretes = empty
|
||||
}
|
||||
|
||||
|
||||
-- default map and filter are for Map here
|
||||
lmap = Prelude.map
|
||||
lfilter = Prelude.filter
|
||||
|
||||
@@ -2,6 +2,8 @@ module GF.GFCC.Macros where
|
||||
|
||||
import GF.GFCC.CId
|
||||
import GF.GFCC.DataGFCC
|
||||
import GF.Formalism.FCFG (FGrammar)
|
||||
import GF.Parsing.FCFG.PInfo (FCFPInfo, fcfPInfoToFGrammar)
|
||||
----import GF.GFCC.PrintGFCC
|
||||
import Data.Map
|
||||
import Data.List
|
||||
@@ -28,6 +30,12 @@ lookType :: GFCC -> CId -> Type
|
||||
lookType gfcc f =
|
||||
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
|
||||
|
||||
lookParser :: GFCC -> CId -> Maybe FCFPInfo
|
||||
lookParser gfcc lang = parser $ lookMap (error "no lang") lang $ concretes gfcc
|
||||
|
||||
lookFCFG :: GFCC -> CId -> Maybe FGrammar
|
||||
lookFCFG gfcc lang = fmap fcfPInfoToFGrammar $ lookParser gfcc lang
|
||||
|
||||
lookGlobalFlag :: GFCC -> CId -> String
|
||||
lookGlobalFlag gfcc f =
|
||||
lookMap "?" f (gflags gfcc)
|
||||
|
||||
@@ -3,8 +3,15 @@ module GF.GFCC.Raw.ConvertGFCC (toGFCC,fromGFCC) where
|
||||
import GF.GFCC.DataGFCC
|
||||
import GF.GFCC.Raw.AbsGFCCRaw
|
||||
|
||||
import GF.Data.Assoc
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.Utilities (NameProfile(..), Profile(..), SyntaxForest(..))
|
||||
import GF.Parsing.FCFG.PInfo (FCFPInfo(..))
|
||||
|
||||
import qualified Data.Array as Array
|
||||
import Data.Map
|
||||
|
||||
|
||||
-- convert parsed grammar to internal GFCC
|
||||
|
||||
toGFCC :: Grammar -> GFCC
|
||||
@@ -31,29 +38,88 @@ toGFCC (Grm [
|
||||
catfuns = fromAscList
|
||||
[(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
|
||||
in Abstr aflags funs cats catfuns,
|
||||
concretes = fromAscList (lmap mkCnc ccs)
|
||||
concretes = fromAscList [(lang, toConcr ts) | App lang ts <- ccs]
|
||||
}
|
||||
where
|
||||
mkCnc (
|
||||
App lang [
|
||||
App (CId "flags") fls,
|
||||
App (CId "lin") ls,
|
||||
App (CId "oper") ops,
|
||||
App (CId "lincat") lincs,
|
||||
App (CId "lindef") linds,
|
||||
App (CId "printname") prns,
|
||||
App (CId "param") params
|
||||
]) = (lang,
|
||||
Concr {
|
||||
cflags = fromAscList [(f,v) | App f [AStr v] <- fls],
|
||||
lins = fromAscList [(f,toTerm v) | App f [v] <- ls],
|
||||
opers = fromAscList [(f,toTerm v) | App f [v] <- ops],
|
||||
lincats = fromAscList [(f,toTerm v) | App f [v] <- lincs],
|
||||
lindefs = fromAscList [(f,toTerm v) | App f [v] <- linds],
|
||||
printnames = fromAscList [(f,toTerm v) | App f [v] <- prns],
|
||||
paramlincats = fromAscList [(f,toTerm v) | App f [v] <- params]
|
||||
}
|
||||
)
|
||||
|
||||
toConcr :: [RExp] -> Concr
|
||||
toConcr = foldl add (Concr {
|
||||
cflags = empty,
|
||||
lins = empty,
|
||||
opers = empty,
|
||||
lincats = empty,
|
||||
lindefs = empty,
|
||||
printnames = empty,
|
||||
paramlincats = empty,
|
||||
parser = Nothing
|
||||
})
|
||||
where
|
||||
add :: Concr -> RExp -> Concr
|
||||
add cnc (App (CId "flags") ts) = cnc { cflags = fromAscList [(f,v) | App f [AStr v] <- ts] }
|
||||
add cnc (App (CId "lin") ts) = cnc { lins = mkTermMap ts }
|
||||
add cnc (App (CId "oper") ts) = cnc { opers = mkTermMap ts }
|
||||
add cnc (App (CId "lincat") ts) = cnc { lincats = mkTermMap ts }
|
||||
add cnc (App (CId "lindef") ts) = cnc { lindefs = mkTermMap ts }
|
||||
add cnc (App (CId "printname") ts) = cnc { printnames = mkTermMap ts }
|
||||
add cnc (App (CId "param") ts) = cnc { paramlincats = mkTermMap ts }
|
||||
add cnc (App (CId "parser") ts) = cnc { parser = Just (toPInfo ts) }
|
||||
|
||||
toPInfo :: [RExp] -> FCFPInfo
|
||||
toPInfo = foldl add (FCFPInfo {
|
||||
allRules = error "FCFPInfo.allRules",
|
||||
topdownRules = error "FCFPInfo.topdownRules",
|
||||
epsilonRules = error "FCFPInfo.epsilonRules",
|
||||
leftcornerCats = error "FCFPInfo.leftcornerCats",
|
||||
leftcornerTokens = error "FCFPInfo.leftcornerTokens",
|
||||
grammarCats = error "FCFPInfo.grammarCats",
|
||||
grammarToks = error "FCFPInfo.grammarToks",
|
||||
startupCats = error "FCFPInfo.startupCats"})
|
||||
where
|
||||
add :: FCFPInfo -> RExp -> FCFPInfo
|
||||
add p (App (CId f) ts) =
|
||||
case f of
|
||||
"rules" -> p { allRules = mkArray (lmap toFRule ts) }
|
||||
"topdownrules" -> p { topdownRules = toAssoc expToInt (lmap expToInt) ts }
|
||||
"epsilonrules" -> p { epsilonRules = lmap expToInt ts }
|
||||
"lccats" -> p { leftcornerCats = toAssoc expToInt (lmap expToInt) ts }
|
||||
"lctoks" -> p { leftcornerTokens = toAssoc expToStr (lmap expToInt) ts }
|
||||
"cats" -> p { grammarCats = lmap expToInt ts }
|
||||
"toks" -> p { grammarToks = lmap expToStr ts }
|
||||
"startupcats" -> p { startupCats = fromList [(c, lmap expToInt cs) | App c cs <- ts] }
|
||||
toFRule :: RExp -> FRule
|
||||
toFRule (App (CId "rule")
|
||||
[n,
|
||||
App (CId "cats") (rt:at),
|
||||
App (CId "R") ls]) = FRule name args res lins
|
||||
where
|
||||
name = toFName n
|
||||
args = lmap expToInt at
|
||||
res = expToInt rt
|
||||
lins = mkArray [mkArray [toSymbol s | s <- l] | App (CId "S") l <- ls]
|
||||
|
||||
toFName :: RExp -> FName
|
||||
toFName (App (CId "_A") [x]) = Name (CId "_") [Unify [expToInt x]]
|
||||
toFName (App f ts) = Name f (lmap toProfile ts)
|
||||
where
|
||||
toProfile :: RExp -> Profile (SyntaxForest CId)
|
||||
toProfile AMet = Unify []
|
||||
toProfile (App (CId "_A") [t]) = Unify [expToInt t]
|
||||
toProfile (App (CId "_U") ts) = Unify [expToInt t | App (CId "_A") [t] <- ts]
|
||||
toProfile t = Constant (toSyntaxForest t)
|
||||
|
||||
toSyntaxForest :: RExp -> SyntaxForest CId
|
||||
toSyntaxForest AMet = FMeta
|
||||
toSyntaxForest (App n ts) = FNode n [lmap toSyntaxForest ts]
|
||||
toSyntaxForest (AStr s) = FString s
|
||||
toSyntaxForest (AInt i) = FInt i
|
||||
toSyntaxForest (AFlt f) = FFloat f
|
||||
|
||||
toSymbol :: RExp -> FSymbol
|
||||
toSymbol (App (CId "proj") [c,n,l]) = FSymCat (expToInt c) (expToInt l) (expToInt n)
|
||||
toSymbol (AStr t) = FSymTok t
|
||||
|
||||
toAssoc :: Ord a => (RExp -> a) -> ([RExp] -> b) -> [RExp] -> Assoc a b
|
||||
toAssoc f g xs = listAssoc [(f k, g v) | App (CId "map") (k:v) <- xs]
|
||||
|
||||
toType :: RExp -> Type
|
||||
toType e = case e of
|
||||
@@ -120,7 +186,7 @@ fromGFCC gfcc0 = Grm [
|
||||
app "lindef" [App f [fromTerm v] | (f,v) <- toList (lindefs cnc)],
|
||||
app "printname" [App f [fromTerm v] | (f,v) <- toList (printnames cnc)],
|
||||
app "param" [App f [fromTerm v] | (f,v) <- toList (paramlincats cnc)]
|
||||
]
|
||||
] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
|
||||
|
||||
fromType :: Type -> RExp
|
||||
fromType e = case e of
|
||||
@@ -163,3 +229,74 @@ fromTerm e = case e of
|
||||
where
|
||||
app = App . CId
|
||||
str v = app "S" (lmap AStr v)
|
||||
|
||||
-- ** Parsing info
|
||||
|
||||
fromPInfo :: FCFPInfo -> RExp
|
||||
fromPInfo p = app "parser" [
|
||||
app "rules" [fromFRule rule | rule <- Array.elems (allRules p)],
|
||||
app "topdownrules" (fromAssoc intToExp (lmap intToExp) (topdownRules p)),
|
||||
app "epsilonrules" (lmap intToExp (epsilonRules p)),
|
||||
app "lccats" (fromAssoc intToExp (lmap intToExp) (leftcornerCats p)),
|
||||
app "lctoks" (fromAssoc AStr (lmap intToExp) (leftcornerTokens p)),
|
||||
app "cats" (lmap intToExp (grammarCats p)),
|
||||
app "toks" (lmap AStr (grammarToks p)),
|
||||
app "startupcats" [App f (lmap intToExp cs) | (f,cs) <- toList (startupCats p)]
|
||||
]
|
||||
|
||||
fromAssoc :: Ord a => (a -> RExp) -> (b -> [RExp]) -> Assoc a b -> [RExp]
|
||||
fromAssoc f g xs = [app "map" (f x:g y) | (x,y) <- aAssocs xs]
|
||||
|
||||
fromFRule :: FRule -> RExp
|
||||
fromFRule (FRule n args res lins) =
|
||||
app "rule" [fromFName n,
|
||||
app "cats" (intToExp res:lmap intToExp args),
|
||||
app "R" [app "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins]
|
||||
]
|
||||
|
||||
fromFName :: FName -> RExp
|
||||
fromFName n = case n of
|
||||
Name (CId "_") [p] -> fromProfile p
|
||||
Name f ps -> App f (lmap fromProfile ps)
|
||||
where
|
||||
fromProfile :: Profile (SyntaxForest CId) -> RExp
|
||||
fromProfile (Unify []) = AMet
|
||||
fromProfile (Unify [x]) = daughter x
|
||||
fromProfile (Unify args) = app "_U" (lmap daughter args)
|
||||
fromProfile (Constant forest) = fromSyntaxForest forest
|
||||
|
||||
daughter n = app "_A" [intToExp n]
|
||||
|
||||
fromSyntaxForest :: SyntaxForest CId -> RExp
|
||||
fromSyntaxForest FMeta = AMet
|
||||
-- FIXME: is there always just one element here?
|
||||
fromSyntaxForest (FNode n [args]) = App n (lmap fromSyntaxForest args)
|
||||
fromSyntaxForest (FString s) = AStr s
|
||||
fromSyntaxForest (FInt i) = AInt i
|
||||
fromSyntaxForest (FFloat f) = AFlt f
|
||||
|
||||
fromSymbol :: FSymbol -> RExp
|
||||
fromSymbol (FSymCat c l n) = app "proj" [intToExp c, intToExp n, intToExp l]
|
||||
fromSymbol (FSymTok t) = AStr t
|
||||
|
||||
-- ** Utilities
|
||||
|
||||
mkTermMap :: [RExp] -> Map CId Term
|
||||
mkTermMap ts = fromAscList [(f,toTerm v) | App f [v] <- ts]
|
||||
|
||||
app :: String -> [RExp] -> RExp
|
||||
app = App . CId
|
||||
|
||||
mkArray :: [a] -> Array.Array Int a
|
||||
mkArray xs = Array.listArray (0, length xs - 1) xs
|
||||
|
||||
expToInt :: Integral a => RExp -> a
|
||||
expToInt (App (CId "neg") [AInt i]) = fromIntegral (negate i)
|
||||
expToInt (AInt i) = fromIntegral i
|
||||
|
||||
expToStr :: RExp -> String
|
||||
expToStr (AStr s) = s
|
||||
|
||||
intToExp :: Integral a => a -> RExp
|
||||
intToExp x | x < 0 = App (CId "neg") [AInt (fromIntegral (negate x))]
|
||||
| otherwise = AInt (fromIntegral x)
|
||||
|
||||
Reference in New Issue
Block a user