From 732459703946775cab19b7fac30fa39fb43ecc0b Mon Sep 17 00:00:00 2001 From: aarne Date: Wed, 19 Sep 2007 16:48:13 +0000 Subject: [PATCH] started extending GFCC API with parsing --- src/GF/Canon/GFCC/FCFGParsing.hs | 171 +++++++++++++++++++++++++++++++ src/GF/Canon/GFCC/GFCCAPI.hs | 115 +++++++++++++++++++++ src/GF/Canon/GFCC/ParGFCC.hs | 4 +- src/GF/Canon/GFCC/Shell.hs | 62 +++++++++++ src/Makefile | 2 +- 5 files changed, 351 insertions(+), 3 deletions(-) create mode 100644 src/GF/Canon/GFCC/FCFGParsing.hs create mode 100644 src/GF/Canon/GFCC/GFCCAPI.hs create mode 100644 src/GF/Canon/GFCC/Shell.hs diff --git a/src/GF/Canon/GFCC/FCFGParsing.hs b/src/GF/Canon/GFCC/FCFGParsing.hs new file mode 100644 index 000000000..f9a838417 --- /dev/null +++ b/src/GF/Canon/GFCC/FCFGParsing.hs @@ -0,0 +1,171 @@ +module GF.Canon.GFCC.FCFGParsing where + +import GF.Canon.GFCC.DataGFCC +import GF.Canon.GFCC.AbsGFCC +import GF.Conversion.SimpleToFCFG (convertGrammar) + +--import GF.System.Tracing +--import GF.Infra.Print +--import qualified GF.Grammar.PrGrammar as PrGrammar + +--import GF.Data.Operations (Err(..)) + +--import qualified GF.Grammar.Grammar as Grammar +--import qualified GF.Grammar.Macros as Macros +--import qualified GF.Canon.AbsGFC as AbsGFC +--import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC +--import qualified GF.Infra.Ident as Ident +--import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok) + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Formalism.Utilities --(forest2trees) + +--import GF.Conversion.Types + +import GF.Formalism.FCFG +--import qualified GF.Formalism.GCFG as G +--import qualified GF.Formalism.SimpleGFC as S +--import qualified GF.Formalism.MCFG as M +--import qualified GF.Formalism.CFG as C +--import qualified GF.Parsing.MCFG as PM +import qualified GF.Parsing.FCFG as PF +--import qualified GF.Parsing.CFG as PC +import GF.Canon.GFCC.ErrM + + +--convertGrammar :: Grammar -> [(Ident,FGrammar)] + +--import qualified GF.Parsing.GFC as New +--checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks +-- algorithm "f" +-- strategy "bottomup" + +type Token = String ---- +type CFTok = String ---- +type CFCat = CId ---- +type Fun = CId ---- + +cfCat2Ident = id ---- + +wordsCFTok :: CFTok -> [String] +wordsCFTok = return ---- + + +type FCFPInfo = PF.FCFPInfo FCat FName Token + +-- main parsing function + +parse :: +-- String -> -- ^ parsing algorithm (mcfg or cfg) +-- String -> -- ^ parsing strategy + FCFPInfo -> -- ^ compiled grammar (fcfg) +-- Ident.Ident -> -- ^ abstract module name + CFCat -> -- ^ starting category + [CFTok] -> -- ^ input tokens + Err [Exp] -- ^ resulting GF terms + +parse pinfo startCat inString = + + do let inTokens = inputMany (map wordsCFTok inString) + forests <- selectParser pinfo startCat inTokens + let filteredForests = forests >>= applyProfileToForest + trees = nubsort $ filteredForests >>= forest2trees + + return $ map tree2term trees + + +-- parsing via FCFG +selectParser pinfo startCat inTokens + = do let startCats = filter isStart $ PF.grammarCats fcfpi + isStart cat = cat' == cfCat2Ident startCat + where CId x = fcat2cid cat + cat' = CId x + fcfpi = pinfo + fcfParser <- PF.parseFCF "bottomup" + let chart = fcfParser fcfpi startCats inTokens + (i,j) = inputBounds inTokens + finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats] + return $ map cnv_forests $ chart2forests chart (const False) finalEdges + +cnv_forests FMeta = FMeta +cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss) +cnv_forests (FString x) = FString x +cnv_forests (FInt x) = FInt x +cnv_forests (FFloat x) = FFloat x + +cnv_profile (Unify x) = Unify x +cnv_profile (Constant x) = Constant (cnv_forests2 x) + +cnv_forests2 FMeta = FMeta +cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss) +cnv_forests2 (FString x) = FString x +cnv_forests2 (FInt x) = FInt x +cnv_forests2 (FFloat x) = FFloat x + +---------------------------------------------------------------------- +-- parse trees to GFCC terms + +tree2term :: SyntaxTree Fun -> Exp +tree2term (TNode f ts) = Tr (AC (CId f)) (map tree2term ts) +{- ---- +tree2term (TString s) = Macros.string2term s +tree2term (TInt n) = Macros.int2term n +tree2term (TFloat f) = Macros.float2term f +tree2term (TMeta) = Macros.mkMeta 0 +-} + + +---------------------------------------------------------------------- +-- conversion and unification of forests + +-- simplest implementation +applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun] +applyProfileToForest (FNode name@(Name fun profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode fun chForests | not (null chForests) ] + where chForests = concat [ applyProfileM unifyManyForests profile forests | + forests0 <- children, + forests <- mapM applyProfileToForest forests0 ] +applyProfileToForest (FString s) = [FString s] +applyProfileToForest (FInt n) = [FInt n] +applyProfileToForest (FFloat f) = [FFloat f] +applyProfileToForest (FMeta) = [FMeta] + + +--------------------- From parsing types ------------------------------ + +-- * fast nonerasing MCFG + +type FIndex = Int +type FPath = [FIndex] +type FName = NameProfile CId +type FGrammar = FCFGrammar FCat FName Token +type FRule = FCFRule FCat FName Token +data FCat = FCat {-# UNPACK #-} !Int CId [FPath] [(FPath,FIndex)] + +initialFCat :: CId -> FCat +initialFCat cat = FCat 0 cat [] [] + +fcatString = FCat (-1) (CId "String") [[0]] [] +fcatInt = FCat (-2) (CId "Int") [[0]] [] +fcatFloat = FCat (-3) (CId "Float") [[0]] [] + +fcat2cid :: FCat -> CId +fcat2cid (FCat _ c _ _) = c + +instance Eq FCat where + (FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2 + +instance Ord FCat where + compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2 + + + +--- + +isCoercion :: Name -> Bool +isCoercion (Name fun [Unify [0]]) = False -- isWildIdent fun +isCoercion _ = False + +type Name = NameProfile Fun diff --git a/src/GF/Canon/GFCC/GFCCAPI.hs b/src/GF/Canon/GFCC/GFCCAPI.hs new file mode 100644 index 000000000..e815697d7 --- /dev/null +++ b/src/GF/Canon/GFCC/GFCCAPI.hs @@ -0,0 +1,115 @@ +---------------------------------------------------------------------- +-- | +-- Module : GFCCAPI +-- Maintainer : Aarne Ranta +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: +-- > CVS $Author: +-- > CVS $Revision: +-- +-- Reduced Application Programmer's Interface to GF, meant for +-- embedded GF systems. AR 19/9/2007 +----------------------------------------------------------------------------- + +module GF.Canon.GFCC.GFCCAPI where + +import GF.Canon.GFCC.DataGFCC +--import GF.Canon.GFCC.GenGFCC +import GF.Canon.GFCC.AbsGFCC +import GF.Canon.GFCC.ParGFCC +import GF.Canon.GFCC.PrintGFCC +import GF.Canon.GFCC.ErrM +--import GF.Data.Operations +--import GF.Infra.UseIO +import qualified Data.Map as Map +import System.Random (newStdGen) +import System.Directory (doesFileExist) +import System + +-- This API is meant to be used when embedding GF grammars in Haskell +-- programs. The embedded system is supposed to use the +-- .gfcm grammar format, which is first produced by the gf program. + +--------------------------------------------------- +-- Interface +--------------------------------------------------- + +type MultiGrammar = GFCC +type Language = String +type Category = String +type Tree = Exp + +file2grammar :: FilePath -> IO MultiGrammar + +linearize :: MultiGrammar -> Language -> Tree -> String +parse :: MultiGrammar -> Language -> Category -> String -> [Tree] + +linearizeAll :: MultiGrammar -> Tree -> [String] +linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)] + +--parseAll :: MultiGrammar -> Category -> String -> [[Tree]] +--parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])] + +readTree :: MultiGrammar -> String -> Tree +showTree :: Tree -> String + +languages :: MultiGrammar -> [Language] +categories :: MultiGrammar -> [Category] + +startCat :: MultiGrammar -> Category + +--------------------------------------------------- +-- Implementation +--------------------------------------------------- + +file2grammar f = + readFileIf f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer + +linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize mgr (CId lang) + + +parse mgr lang cat s = [] +{- + map tree2exp . + errVal [] . + parseString (stateOptions sgr) sgr cfcat + where + sgr = stateGrammarOfLang mgr (zIdent lang) + cfcat = string2CFCat abs cat + abs = maybe (error "no abstract syntax") prIdent $ abstract mgr +-} + +linearizeAll mgr = map snd . linearizeAllLang mgr +linearizeAllLang mgr t = [(lang,linearThis mgr lang t) | lang <- languages mgr] + +{- +parseAll mgr cat = map snd . parseAllLang mgr cat + +parseAllLang mgr cat s = + [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)] +-} + +readTree _ = err (const exp0) id . (pExp . myLexer) + +showTree t = printTree t + +languages mgr = [l | CId l <- cncnames mgr] + +categories mgr = [c | CId c <- Map.keys (cats (abstract mgr))] + +startCat mgr = "S" ---- + +------------ for internal use only + +linearThis = GF.Canon.GFCC.GFCCAPI.linearize + +err f g ex = case ex of + Ok x -> g x + Bad s -> f s + +readFileIf f = do + b <- doesFileExist f + if b then readFile f + else putStrLn ("file " ++ f ++ " not found") >> return "" diff --git a/src/GF/Canon/GFCC/ParGFCC.hs b/src/GF/Canon/GFCC/ParGFCC.hs index 22813a47b..2d208c20d 100644 --- a/src/GF/Canon/GFCC/ParGFCC.hs +++ b/src/GF/Canon/GFCC/ParGFCC.hs @@ -448,14 +448,14 @@ happyReduction_42 happy_x_2 happy_x_1 = case happyOut24 happy_x_2 of { happy_var_2 -> happyIn36 - (V happy_var_2 + (V (fromInteger happy_var_2) --H )} happyReduce_43 = happySpecReduce_1 13# happyReduction_43 happyReduction_43 happy_x_1 = case happyOut24 happy_x_1 of { happy_var_1 -> happyIn36 - (C happy_var_1 + (C (fromInteger happy_var_1) --H )} happyReduce_44 = happySpecReduce_1 13# happyReduction_44 diff --git a/src/GF/Canon/GFCC/Shell.hs b/src/GF/Canon/GFCC/Shell.hs new file mode 100644 index 000000000..bc33e7949 --- /dev/null +++ b/src/GF/Canon/GFCC/Shell.hs @@ -0,0 +1,62 @@ +module Main where + +import GF.Canon.GFCC.GFCCAPI +import qualified GF.Canon.GFCC.GenGFCC as G --- +import GF.Canon.GFCC.AbsGFCC (CId(CId)) --- +import System.Random (newStdGen) +import System (getArgs) + + +-- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007 + +main :: IO () +main = do + file:_ <- getArgs + grammar <- file2grammar file + putStrLn $ "languages: " ++ unwords (languages grammar) + putStrLn $ "categories: " ++ unwords (categories grammar) + loop grammar + +loop :: MultiGrammar -> IO () +loop grammar = do + s <- getLine + if s == "quit" then return () else do + treat grammar s + loop grammar + +treat :: MultiGrammar -> String -> IO () +treat grammar s = case words s of + "gt":cat:n:_ -> do + mapM_ prlinonly $ take (read n) $ G.generate grammar (CId cat) + "gtt":cat:n:_ -> do + mapM_ prlin $ take (read n) $ G.generate grammar (CId cat) + "gr":cat:n:_ -> do + gen <- newStdGen + mapM_ prlinonly $ take (read n) $ G.generateRandom gen grammar (CId cat) + "grt":cat:n:_ -> do + gen <- newStdGen + mapM_ prlin $ take (read n) $ G.generateRandom gen grammar (CId cat) + "p":lang:cat:ws -> do + let ts = parse grammar lang cat $ unwords ws + mapM_ (putStrLn . showTree) ts + "search":cat:n:ws -> do + case G.parse (read n) grammar (CId cat) ws of + t:_ -> prlin t + _ -> putStrLn "no parse found" + _ -> lins $ readTree grammar s + where + langs = languages grammar + lins t = mapM_ (lint t) $ langs + lint t lang = do +---- putStrLn $ showTree $ linExp grammar lang t + lin t lang + lin t lang = do + putStrLn $ linearize grammar lang t + prlins t = do + putStrLn $ showTree t + lins t + prlin t = do + putStrLn $ showTree t + prlinonly t + prlinonly t = mapM_ (lin t) $ langs + diff --git a/src/Makefile b/src/Makefile index aae174619..35bd35fae 100644 --- a/src/Makefile +++ b/src/Makefile @@ -192,7 +192,7 @@ tools/$(GF_DOC_EXE): tools/GFDoc.hs $(GHMAKE) $(GHCOPTFLAGS) -o $@ $^ gfcc: - $(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/RunGFCC.hs + $(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/Shell.hs strip gfcc mv gfcc ../bin/