diff --git a/src/GF/Canon/CanonToGFCC.hs b/src/GF/Canon/CanonToGFCC.hs index 8d5656aa4..044ea3669 100644 --- a/src/GF/Canon/CanonToGFCC.hs +++ b/src/GF/Canon/CanonToGFCC.hs @@ -25,7 +25,7 @@ source2gfcc opts gf = let (abs,gfcc) = mkCanon2gfcc opts (gfcabs gf) gf gfcc1 = maybe undefined id $ checkGFCCmaybe gfcc - in if oElem (iOpt "noopt") opts then gfcc1 else optGFCC gfcc1 + in addParsers $ if oElem (iOpt "noopt") opts then gfcc1 else optGFCC gfcc1 gfcabs gfc = prt $ head $ M.allConcretes gfc $ maybe (error "no abstract") id $ diff --git a/src/GF/Command/Importing.hs b/src/GF/Command/Importing.hs index 676eec37f..788dab20a 100644 --- a/src/GF/Command/Importing.hs +++ b/src/GF/Command/Importing.hs @@ -22,10 +22,8 @@ importGrammar mgr0 opts files = do let name = justModuleName (last files) let (abs,gfcc0) = mkCanon2gfcc opts name gr gfcc1 <- checkGFCCio gfcc0 - return $ if oElem (iOpt "noopt") opts then gfcc1 else optGFCC gfcc1 + return $ addParsers $ if oElem (iOpt "noopt") opts then gfcc1 else optGFCC gfcc1 "gfcc" -> mapM file2gfcc files >>= return . foldl1 unionGFCC let gfcc3 = unionGFCC (gfcc mgr0) gfcc2 - return $ MultiGrammar gfcc3 - (nubBy (\ (x,_) (y,_) -> x == y) (gfcc2parsers gfcc3 ++ parsers mgr0)) - -- later coming parsers override + return $ MultiGrammar gfcc3 \ No newline at end of file diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs index e2e5486ca..0e24da601 100644 --- a/src/GF/Compile/ShellState.hs +++ b/src/GF/Compile/ShellState.hs @@ -19,6 +19,7 @@ import GF.Canon.GFC import GF.Canon.AbsGFC import GF.GFCC.CId --import GF.GFCC.DataGFCC(mkGFCC) +import GF.GFCC.Macros (lookFCFG) import GF.Canon.CanonToGFCC import GF.Grammar.Macros import GF.Grammar.MMacros @@ -263,9 +264,8 @@ updateShellState opts ign mcnc sh ((_,sgr,gr,eenv),rts) = do let fromGFC = snd . snd . Cnv.convertGFC opts (mcfgs, cfgs) = unzip $ map (curry fromGFC cgr) concrs - fcfgs0 = [(IC id,g) | (CId id,g) <- - FCnv.convertGrammar (canon2gfcc opts cgr)] ---- UTF8 - fcfgs = [(c,g) | c <- concrs, Just g <- [lookup c fcfgs0]] + gfcc = canon2gfcc opts cgr ---- UTF8 + fcfgs = [(c,g) | c@(IC cn) <- concrs, Just g <- [lookFCFG gfcc (CId cn)]] pInfos = zipWith3 Prs.buildPInfo mcfgs (map snd fcfgs) cfgs let funs = funRulesOf cgr diff --git a/src/GF/Conversion/SimpleToFCFG.hs b/src/GF/Conversion/SimpleToFCFG.hs index 081a2485d..1c5901fcf 100644 --- a/src/GF/Conversion/SimpleToFCFG.hs +++ b/src/GF/Conversion/SimpleToFCFG.hs @@ -13,7 +13,7 @@ module GF.Conversion.SimpleToFCFG - (convertGrammar) where + (convertConcrete) where import GF.Infra.PrintClass @@ -39,19 +39,14 @@ import Data.Maybe ---------------------------------------------------------------------- -- main conversion function -convertGrammar :: GFCC -> [(CId,FGrammar)] -convertGrammar gfcc = - [(cncname,convert abs_defs conc cats) - | cncname <- cncnames gfcc, - cnc <- Map.lookup cncname (concretes gfcc), - let conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" +convertConcrete :: Abstr -> Concr -> FGrammar +convertConcrete abs cnc = convert abs_defs conc cats + where abs_defs = Map.assocs (funs abs) + conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" cats = lincats cnc - ] - where - abs_defs = Map.assocs (funs (abstract gfcc)) - convert :: [(CId,(Type,Exp))] -> TermMap -> TermMap -> FGrammar - convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv) +convert :: [(CId,(Type,Exp))] -> TermMap -> TermMap -> FGrammar +convert abs_defs cnc_defs cat_defs = getFGrammar (loop frulesEnv) where srules = [ (XRule id args res (map findLinType args) (findLinType res) term) | diff --git a/src/GF/Devel/GFC.hs b/src/GF/Devel/GFC.hs index 85c9328f4..2cb9104c5 100644 --- a/src/GF/Devel/GFC.hs +++ b/src/GF/Devel/GFC.hs @@ -24,7 +24,7 @@ mainGFC xx = do let name = justModuleName (last fs) let (abs,gc0) = mkCanon2gfcc opts name gr gc1 <- checkGFCCio gc0 - let gc = if oElem (iOpt "noopt") opts then gc1 else optGFCC gc1 + let gc = addParsers $ if oElem (iOpt "noopt") opts then gc1 else optGFCC gc1 let target = targetName opts abs let gfccFile = target ++ ".gfcc" writeFile gfccFile (printGFCC gc) diff --git a/src/GF/Devel/GrammarToGFCC.hs b/src/GF/Devel/GrammarToGFCC.hs index b7eaebe31..5b2f4ce17 100644 --- a/src/GF/Devel/GrammarToGFCC.hs +++ b/src/GF/Devel/GrammarToGFCC.hs @@ -1,4 +1,4 @@ -module GF.Devel.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc) where +module GF.Devel.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc,addParsers) where import GF.Devel.OptimizeGF (unshareModule) @@ -15,6 +15,8 @@ import qualified GF.Grammar.Macros as GM import qualified GF.Infra.Modules as M import qualified GF.Infra.Option as O +import GF.Conversion.SimpleToFCFG (convertConcrete) +import GF.Parsing.FCFG.PInfo (buildFCFPInfo) import GF.Devel.PrGrammar import GF.Devel.PrintGFCC import GF.Devel.ModDeps @@ -41,6 +43,12 @@ mkCanon2gfcc opts cnc gr = abs = err error id $ M.abstractOfConcrete gr (identC cnc) pars = mkParamLincat gr +-- Adds parsers for all concretes +addParsers :: D.GFCC -> D.GFCC +addParsers gfcc = gfcc { D.concretes = Map.map conv (D.concretes gfcc) } + where + conv cnc = cnc { D.parser = Just (buildFCFPInfo (convertConcrete (D.abstract gfcc) cnc)) } + -- Generate GFCC from GFCM. -- this assumes a grammar translated by canon2canon @@ -72,7 +80,7 @@ canon2gfcc opts pars cgr@(M.MGrammar ((a,M.ModMod abm):cms)) = cncs = Map.fromList [mkConcr lang (i2i lang) mo | (lang,M.ModMod mo) <- cms] mkConcr lang0 lang mo = - (lang,D.Concr flags lins opers lincats lindefs printnames params) + (lang,D.Concr flags lins opers lincats lindefs printnames params fcfg) where js = tree2list (M.jments mo) flags = Map.fromList [(CId f,x) | Opt (f,[x]) <- M.flags mo] @@ -90,6 +98,7 @@ canon2gfcc opts pars cgr@(M.MGrammar ((a,M.ModMod abm):cms)) = (Map.fromAscList [(i2i f, mkTerm tr) | (f,CncCat _ _ (Yes tr)) <- js]) params = Map.fromAscList [(i2i c, pars lang0 c) | (c,CncCat (Yes ty) _ _) <- js] + fcfg = Nothing i2i :: Ident -> CId i2i = CId . prIdent diff --git a/src/GF/GFCC/API.hs b/src/GF/GFCC/API.hs index 111857b18..0a3b37cc5 100644 --- a/src/GF/GFCC/API.hs +++ b/src/GF/GFCC/API.hs @@ -27,7 +27,6 @@ import GF.Command.PPrTree import GF.Data.ErrM import GF.Parsing.FCFG -import GF.Conversion.SimpleToFCFG (convertGrammar) --import GF.Data.Operations --import GF.Infra.UseIO @@ -44,7 +43,7 @@ import System.Directory (doesFileExist) -- Interface --------------------------------------------------- -data MultiGrammar = MultiGrammar {gfcc :: GFCC, parsers :: [(Language,FCFPInfo)]} +data MultiGrammar = MultiGrammar {gfcc :: GFCC} type Language = String type Category = String type Tree = Exp @@ -77,10 +76,7 @@ startCat :: MultiGrammar -> Category file2grammar f = do gfcc <- file2gfcc f - return (MultiGrammar gfcc (gfcc2parsers gfcc)) - -gfcc2parsers gfcc = - [(lang, buildFCFPInfo fcfg) | (CId lang,fcfg) <- convertGrammar gfcc] + return (MultiGrammar gfcc) file2gfcc f = do s <- readFileIf f @@ -90,7 +86,7 @@ file2gfcc f = do linearize mgr lang = GF.GFCC.Linearize.linearize (gfcc mgr) (CId lang) parse mgr lang cat s = - case lookup lang (parsers mgr) of + case lookParser (gfcc mgr) (CId lang) of Nothing -> error "no parser" Just pinfo -> case parseFCF "bottomup" pinfo (CId cat) (words s) of Ok x -> x @@ -126,7 +122,7 @@ categories mgr = [c | CId c <- Map.keys (cats (abstract (gfcc mgr)))] startCat mgr = "S" ---- -emptyMultiGrammar = MultiGrammar emptyGFCC [] +emptyMultiGrammar = MultiGrammar emptyGFCC ------------ for internal use only diff --git a/src/GF/GFCC/DataGFCC.hs b/src/GF/GFCC/DataGFCC.hs index 35c8b08c5..89ab28170 100644 --- a/src/GF/GFCC/DataGFCC.hs +++ b/src/GF/GFCC/DataGFCC.hs @@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where import GF.GFCC.CId import GF.Infra.CompactPrint import GF.Text.UTF8 +import GF.Formalism.FCFG +import GF.Parsing.FCFG.PInfo import Data.Map import Data.List @@ -31,7 +33,8 @@ data Concr = Concr { lincats :: Map CId Term, -- lin type of a cat lindefs :: Map CId Term, -- lin default of a cat printnames :: Map CId Term, -- printname of a cat or a fun - paramlincats :: Map CId Term -- lin type of cat, with printable param names + paramlincats :: Map CId Term, -- lin type of cat, with printable param names + parser :: Maybe FCFPInfo -- parser } data Type = @@ -116,7 +119,6 @@ emptyGFCC = GFCC { concretes = empty } - -- default map and filter are for Map here lmap = Prelude.map lfilter = Prelude.filter diff --git a/src/GF/GFCC/Macros.hs b/src/GF/GFCC/Macros.hs index 3e88952d4..383b77d34 100644 --- a/src/GF/GFCC/Macros.hs +++ b/src/GF/GFCC/Macros.hs @@ -2,6 +2,8 @@ module GF.GFCC.Macros where import GF.GFCC.CId import GF.GFCC.DataGFCC +import GF.Formalism.FCFG (FGrammar) +import GF.Parsing.FCFG.PInfo (FCFPInfo, fcfPInfoToFGrammar) ----import GF.GFCC.PrintGFCC import Data.Map import Data.List @@ -28,6 +30,12 @@ lookType :: GFCC -> CId -> Type lookType gfcc f = fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc)) +lookParser :: GFCC -> CId -> Maybe FCFPInfo +lookParser gfcc lang = parser $ lookMap (error "no lang") lang $ concretes gfcc + +lookFCFG :: GFCC -> CId -> Maybe FGrammar +lookFCFG gfcc lang = fmap fcfPInfoToFGrammar $ lookParser gfcc lang + lookGlobalFlag :: GFCC -> CId -> String lookGlobalFlag gfcc f = lookMap "?" f (gflags gfcc) diff --git a/src/GF/GFCC/Raw/ConvertGFCC.hs b/src/GF/GFCC/Raw/ConvertGFCC.hs index 325d6ea6d..0636cf5e1 100644 --- a/src/GF/GFCC/Raw/ConvertGFCC.hs +++ b/src/GF/GFCC/Raw/ConvertGFCC.hs @@ -3,8 +3,15 @@ module GF.GFCC.Raw.ConvertGFCC (toGFCC,fromGFCC) where import GF.GFCC.DataGFCC import GF.GFCC.Raw.AbsGFCCRaw +import GF.Data.Assoc +import GF.Formalism.FCFG +import GF.Formalism.Utilities (NameProfile(..), Profile(..), SyntaxForest(..)) +import GF.Parsing.FCFG.PInfo (FCFPInfo(..)) + +import qualified Data.Array as Array import Data.Map + -- convert parsed grammar to internal GFCC toGFCC :: Grammar -> GFCC @@ -31,29 +38,88 @@ toGFCC (Grm [ catfuns = fromAscList [(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats] in Abstr aflags funs cats catfuns, - concretes = fromAscList (lmap mkCnc ccs) + concretes = fromAscList [(lang, toConcr ts) | App lang ts <- ccs] } where - mkCnc ( - App lang [ - App (CId "flags") fls, - App (CId "lin") ls, - App (CId "oper") ops, - App (CId "lincat") lincs, - App (CId "lindef") linds, - App (CId "printname") prns, - App (CId "param") params - ]) = (lang, - Concr { - cflags = fromAscList [(f,v) | App f [AStr v] <- fls], - lins = fromAscList [(f,toTerm v) | App f [v] <- ls], - opers = fromAscList [(f,toTerm v) | App f [v] <- ops], - lincats = fromAscList [(f,toTerm v) | App f [v] <- lincs], - lindefs = fromAscList [(f,toTerm v) | App f [v] <- linds], - printnames = fromAscList [(f,toTerm v) | App f [v] <- prns], - paramlincats = fromAscList [(f,toTerm v) | App f [v] <- params] - } - ) + +toConcr :: [RExp] -> Concr +toConcr = foldl add (Concr { + cflags = empty, + lins = empty, + opers = empty, + lincats = empty, + lindefs = empty, + printnames = empty, + paramlincats = empty, + parser = Nothing + }) + where + add :: Concr -> RExp -> Concr + add cnc (App (CId "flags") ts) = cnc { cflags = fromAscList [(f,v) | App f [AStr v] <- ts] } + add cnc (App (CId "lin") ts) = cnc { lins = mkTermMap ts } + add cnc (App (CId "oper") ts) = cnc { opers = mkTermMap ts } + add cnc (App (CId "lincat") ts) = cnc { lincats = mkTermMap ts } + add cnc (App (CId "lindef") ts) = cnc { lindefs = mkTermMap ts } + add cnc (App (CId "printname") ts) = cnc { printnames = mkTermMap ts } + add cnc (App (CId "param") ts) = cnc { paramlincats = mkTermMap ts } + add cnc (App (CId "parser") ts) = cnc { parser = Just (toPInfo ts) } + +toPInfo :: [RExp] -> FCFPInfo +toPInfo = foldl add (FCFPInfo { + allRules = error "FCFPInfo.allRules", + topdownRules = error "FCFPInfo.topdownRules", + epsilonRules = error "FCFPInfo.epsilonRules", + leftcornerCats = error "FCFPInfo.leftcornerCats", + leftcornerTokens = error "FCFPInfo.leftcornerTokens", + grammarCats = error "FCFPInfo.grammarCats", + grammarToks = error "FCFPInfo.grammarToks", + startupCats = error "FCFPInfo.startupCats"}) + where + add :: FCFPInfo -> RExp -> FCFPInfo + add p (App (CId f) ts) = + case f of + "rules" -> p { allRules = mkArray (lmap toFRule ts) } + "topdownrules" -> p { topdownRules = toAssoc expToInt (lmap expToInt) ts } + "epsilonrules" -> p { epsilonRules = lmap expToInt ts } + "lccats" -> p { leftcornerCats = toAssoc expToInt (lmap expToInt) ts } + "lctoks" -> p { leftcornerTokens = toAssoc expToStr (lmap expToInt) ts } + "cats" -> p { grammarCats = lmap expToInt ts } + "toks" -> p { grammarToks = lmap expToStr ts } + "startupcats" -> p { startupCats = fromList [(c, lmap expToInt cs) | App c cs <- ts] } + toFRule :: RExp -> FRule + toFRule (App (CId "rule") + [n, + App (CId "cats") (rt:at), + App (CId "R") ls]) = FRule name args res lins + where + name = toFName n + args = lmap expToInt at + res = expToInt rt + lins = mkArray [mkArray [toSymbol s | s <- l] | App (CId "S") l <- ls] + +toFName :: RExp -> FName +toFName (App (CId "_A") [x]) = Name (CId "_") [Unify [expToInt x]] +toFName (App f ts) = Name f (lmap toProfile ts) + where + toProfile :: RExp -> Profile (SyntaxForest CId) + toProfile AMet = Unify [] + toProfile (App (CId "_A") [t]) = Unify [expToInt t] + toProfile (App (CId "_U") ts) = Unify [expToInt t | App (CId "_A") [t] <- ts] + toProfile t = Constant (toSyntaxForest t) + + toSyntaxForest :: RExp -> SyntaxForest CId + toSyntaxForest AMet = FMeta + toSyntaxForest (App n ts) = FNode n [lmap toSyntaxForest ts] + toSyntaxForest (AStr s) = FString s + toSyntaxForest (AInt i) = FInt i + toSyntaxForest (AFlt f) = FFloat f + +toSymbol :: RExp -> FSymbol +toSymbol (App (CId "proj") [c,n,l]) = FSymCat (expToInt c) (expToInt l) (expToInt n) +toSymbol (AStr t) = FSymTok t + +toAssoc :: Ord a => (RExp -> a) -> ([RExp] -> b) -> [RExp] -> Assoc a b +toAssoc f g xs = listAssoc [(f k, g v) | App (CId "map") (k:v) <- xs] toType :: RExp -> Type toType e = case e of @@ -120,7 +186,7 @@ fromGFCC gfcc0 = Grm [ app "lindef" [App f [fromTerm v] | (f,v) <- toList (lindefs cnc)], app "printname" [App f [fromTerm v] | (f,v) <- toList (printnames cnc)], app "param" [App f [fromTerm v] | (f,v) <- toList (paramlincats cnc)] - ] + ] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc) fromType :: Type -> RExp fromType e = case e of @@ -163,3 +229,74 @@ fromTerm e = case e of where app = App . CId str v = app "S" (lmap AStr v) + +-- ** Parsing info + +fromPInfo :: FCFPInfo -> RExp +fromPInfo p = app "parser" [ + app "rules" [fromFRule rule | rule <- Array.elems (allRules p)], + app "topdownrules" (fromAssoc intToExp (lmap intToExp) (topdownRules p)), + app "epsilonrules" (lmap intToExp (epsilonRules p)), + app "lccats" (fromAssoc intToExp (lmap intToExp) (leftcornerCats p)), + app "lctoks" (fromAssoc AStr (lmap intToExp) (leftcornerTokens p)), + app "cats" (lmap intToExp (grammarCats p)), + app "toks" (lmap AStr (grammarToks p)), + app "startupcats" [App f (lmap intToExp cs) | (f,cs) <- toList (startupCats p)] + ] + +fromAssoc :: Ord a => (a -> RExp) -> (b -> [RExp]) -> Assoc a b -> [RExp] +fromAssoc f g xs = [app "map" (f x:g y) | (x,y) <- aAssocs xs] + +fromFRule :: FRule -> RExp +fromFRule (FRule n args res lins) = + app "rule" [fromFName n, + app "cats" (intToExp res:lmap intToExp args), + app "R" [app "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins] + ] + +fromFName :: FName -> RExp +fromFName n = case n of + Name (CId "_") [p] -> fromProfile p + Name f ps -> App f (lmap fromProfile ps) + where + fromProfile :: Profile (SyntaxForest CId) -> RExp + fromProfile (Unify []) = AMet + fromProfile (Unify [x]) = daughter x + fromProfile (Unify args) = app "_U" (lmap daughter args) + fromProfile (Constant forest) = fromSyntaxForest forest + + daughter n = app "_A" [intToExp n] + + fromSyntaxForest :: SyntaxForest CId -> RExp + fromSyntaxForest FMeta = AMet + -- FIXME: is there always just one element here? + fromSyntaxForest (FNode n [args]) = App n (lmap fromSyntaxForest args) + fromSyntaxForest (FString s) = AStr s + fromSyntaxForest (FInt i) = AInt i + fromSyntaxForest (FFloat f) = AFlt f + +fromSymbol :: FSymbol -> RExp +fromSymbol (FSymCat c l n) = app "proj" [intToExp c, intToExp n, intToExp l] +fromSymbol (FSymTok t) = AStr t + +-- ** Utilities + +mkTermMap :: [RExp] -> Map CId Term +mkTermMap ts = fromAscList [(f,toTerm v) | App f [v] <- ts] + +app :: String -> [RExp] -> RExp +app = App . CId + +mkArray :: [a] -> Array.Array Int a +mkArray xs = Array.listArray (0, length xs - 1) xs + +expToInt :: Integral a => RExp -> a +expToInt (App (CId "neg") [AInt i]) = fromIntegral (negate i) +expToInt (AInt i) = fromIntegral i + +expToStr :: RExp -> String +expToStr (AStr s) = s + +intToExp :: Integral a => a -> RExp +intToExp x | x < 0 = App (CId "neg") [AInt (fromIntegral (negate x))] + | otherwise = AInt (fromIntegral x) diff --git a/src/GF/Parsing/FCFG/PInfo.hs b/src/GF/Parsing/FCFG/PInfo.hs index bf2859911..8b288f2f1 100644 --- a/src/GF/Parsing/FCFG/PInfo.hs +++ b/src/GF/Parsing/FCFG/PInfo.hs @@ -99,6 +99,9 @@ buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x grammarcats = aElems topdownrules grammartoks = nubsort [t | (FRule _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] +fcfPInfoToFGrammar :: FCFPInfo -> FGrammar +fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo) + ---------------------------------------------------------------------- -- pretty-printing of statistics