started extending GFCC API with parsing

This commit is contained in:
aarne
2007-09-19 16:48:13 +00:00
parent 6dcd33ec26
commit 7324597039
5 changed files with 351 additions and 3 deletions

View File

@@ -0,0 +1,171 @@
module GF.Canon.GFCC.FCFGParsing where
import GF.Canon.GFCC.DataGFCC
import GF.Canon.GFCC.AbsGFCC
import GF.Conversion.SimpleToFCFG (convertGrammar)
--import GF.System.Tracing
--import GF.Infra.Print
--import qualified GF.Grammar.PrGrammar as PrGrammar
--import GF.Data.Operations (Err(..))
--import qualified GF.Grammar.Grammar as Grammar
--import qualified GF.Grammar.Macros as Macros
--import qualified GF.Canon.AbsGFC as AbsGFC
--import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
--import qualified GF.Infra.Ident as Ident
--import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Formalism.Utilities --(forest2trees)
--import GF.Conversion.Types
import GF.Formalism.FCFG
--import qualified GF.Formalism.GCFG as G
--import qualified GF.Formalism.SimpleGFC as S
--import qualified GF.Formalism.MCFG as M
--import qualified GF.Formalism.CFG as C
--import qualified GF.Parsing.MCFG as PM
import qualified GF.Parsing.FCFG as PF
--import qualified GF.Parsing.CFG as PC
import GF.Canon.GFCC.ErrM
--convertGrammar :: Grammar -> [(Ident,FGrammar)]
--import qualified GF.Parsing.GFC as New
--checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
-- algorithm "f"
-- strategy "bottomup"
type Token = String ----
type CFTok = String ----
type CFCat = CId ----
type Fun = CId ----
cfCat2Ident = id ----
wordsCFTok :: CFTok -> [String]
wordsCFTok = return ----
type FCFPInfo = PF.FCFPInfo FCat FName Token
-- main parsing function
parse ::
-- String -> -- ^ parsing algorithm (mcfg or cfg)
-- String -> -- ^ parsing strategy
FCFPInfo -> -- ^ compiled grammar (fcfg)
-- Ident.Ident -> -- ^ abstract module name
CFCat -> -- ^ starting category
[CFTok] -> -- ^ input tokens
Err [Exp] -- ^ resulting GF terms
parse pinfo startCat inString =
do let inTokens = inputMany (map wordsCFTok inString)
forests <- selectParser pinfo startCat inTokens
let filteredForests = forests >>= applyProfileToForest
trees = nubsort $ filteredForests >>= forest2trees
return $ map tree2term trees
-- parsing via FCFG
selectParser pinfo startCat inTokens
= do let startCats = filter isStart $ PF.grammarCats fcfpi
isStart cat = cat' == cfCat2Ident startCat
where CId x = fcat2cid cat
cat' = CId x
fcfpi = pinfo
fcfParser <- PF.parseFCF "bottomup"
let chart = fcfParser fcfpi startCats inTokens
(i,j) = inputBounds inTokens
finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats]
return $ map cnv_forests $ chart2forests chart (const False) finalEdges
cnv_forests FMeta = FMeta
cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
cnv_forests (FString x) = FString x
cnv_forests (FInt x) = FInt x
cnv_forests (FFloat x) = FFloat x
cnv_profile (Unify x) = Unify x
cnv_profile (Constant x) = Constant (cnv_forests2 x)
cnv_forests2 FMeta = FMeta
cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
cnv_forests2 (FString x) = FString x
cnv_forests2 (FInt x) = FInt x
cnv_forests2 (FFloat x) = FFloat x
----------------------------------------------------------------------
-- parse trees to GFCC terms
tree2term :: SyntaxTree Fun -> Exp
tree2term (TNode f ts) = Tr (AC (CId f)) (map tree2term ts)
{- ----
tree2term (TString s) = Macros.string2term s
tree2term (TInt n) = Macros.int2term n
tree2term (TFloat f) = Macros.float2term f
tree2term (TMeta) = Macros.mkMeta 0
-}
----------------------------------------------------------------------
-- conversion and unification of forests
-- simplest implementation
applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
applyProfileToForest (FNode name@(Name fun profile) children)
| isCoercion name = concat chForests
| otherwise = [ FNode fun chForests | not (null chForests) ]
where chForests = concat [ applyProfileM unifyManyForests profile forests |
forests0 <- children,
forests <- mapM applyProfileToForest forests0 ]
applyProfileToForest (FString s) = [FString s]
applyProfileToForest (FInt n) = [FInt n]
applyProfileToForest (FFloat f) = [FFloat f]
applyProfileToForest (FMeta) = [FMeta]
--------------------- From parsing types ------------------------------
-- * fast nonerasing MCFG
type FIndex = Int
type FPath = [FIndex]
type FName = NameProfile CId
type FGrammar = FCFGrammar FCat FName Token
type FRule = FCFRule FCat FName Token
data FCat = FCat {-# UNPACK #-} !Int CId [FPath] [(FPath,FIndex)]
initialFCat :: CId -> FCat
initialFCat cat = FCat 0 cat [] []
fcatString = FCat (-1) (CId "String") [[0]] []
fcatInt = FCat (-2) (CId "Int") [[0]] []
fcatFloat = FCat (-3) (CId "Float") [[0]] []
fcat2cid :: FCat -> CId
fcat2cid (FCat _ c _ _) = c
instance Eq FCat where
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
instance Ord FCat where
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
---
isCoercion :: Name -> Bool
isCoercion (Name fun [Unify [0]]) = False -- isWildIdent fun
isCoercion _ = False
type Name = NameProfile Fun

View File

@@ -0,0 +1,115 @@
----------------------------------------------------------------------
-- |
-- Module : GFCCAPI
-- Maintainer : Aarne Ranta
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- Reduced Application Programmer's Interface to GF, meant for
-- embedded GF systems. AR 19/9/2007
-----------------------------------------------------------------------------
module GF.Canon.GFCC.GFCCAPI where
import GF.Canon.GFCC.DataGFCC
--import GF.Canon.GFCC.GenGFCC
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.ParGFCC
import GF.Canon.GFCC.PrintGFCC
import GF.Canon.GFCC.ErrM
--import GF.Data.Operations
--import GF.Infra.UseIO
import qualified Data.Map as Map
import System.Random (newStdGen)
import System.Directory (doesFileExist)
import System
-- This API is meant to be used when embedding GF grammars in Haskell
-- programs. The embedded system is supposed to use the
-- .gfcm grammar format, which is first produced by the gf program.
---------------------------------------------------
-- Interface
---------------------------------------------------
type MultiGrammar = GFCC
type Language = String
type Category = String
type Tree = Exp
file2grammar :: FilePath -> IO MultiGrammar
linearize :: MultiGrammar -> Language -> Tree -> String
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
linearizeAll :: MultiGrammar -> Tree -> [String]
linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
--parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
--parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
readTree :: MultiGrammar -> String -> Tree
showTree :: Tree -> String
languages :: MultiGrammar -> [Language]
categories :: MultiGrammar -> [Category]
startCat :: MultiGrammar -> Category
---------------------------------------------------
-- Implementation
---------------------------------------------------
file2grammar f =
readFileIf f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer
linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize mgr (CId lang)
parse mgr lang cat s = []
{-
map tree2exp .
errVal [] .
parseString (stateOptions sgr) sgr cfcat
where
sgr = stateGrammarOfLang mgr (zIdent lang)
cfcat = string2CFCat abs cat
abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
-}
linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t = [(lang,linearThis mgr lang t) | lang <- languages mgr]
{-
parseAll mgr cat = map snd . parseAllLang mgr cat
parseAllLang mgr cat s =
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
-}
readTree _ = err (const exp0) id . (pExp . myLexer)
showTree t = printTree t
languages mgr = [l | CId l <- cncnames mgr]
categories mgr = [c | CId c <- Map.keys (cats (abstract mgr))]
startCat mgr = "S" ----
------------ for internal use only
linearThis = GF.Canon.GFCC.GFCCAPI.linearize
err f g ex = case ex of
Ok x -> g x
Bad s -> f s
readFileIf f = do
b <- doesFileExist f
if b then readFile f
else putStrLn ("file " ++ f ++ " not found") >> return ""

View File

@@ -448,14 +448,14 @@ happyReduction_42 happy_x_2
happy_x_1
= case happyOut24 happy_x_2 of { happy_var_2 ->
happyIn36
(V happy_var_2
(V (fromInteger happy_var_2) --H
)}
happyReduce_43 = happySpecReduce_1 13# happyReduction_43
happyReduction_43 happy_x_1
= case happyOut24 happy_x_1 of { happy_var_1 ->
happyIn36
(C happy_var_1
(C (fromInteger happy_var_1) --H
)}
happyReduce_44 = happySpecReduce_1 13# happyReduction_44

View File

@@ -0,0 +1,62 @@
module Main where
import GF.Canon.GFCC.GFCCAPI
import qualified GF.Canon.GFCC.GenGFCC as G ---
import GF.Canon.GFCC.AbsGFCC (CId(CId)) ---
import System.Random (newStdGen)
import System (getArgs)
-- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007
main :: IO ()
main = do
file:_ <- getArgs
grammar <- file2grammar file
putStrLn $ "languages: " ++ unwords (languages grammar)
putStrLn $ "categories: " ++ unwords (categories grammar)
loop grammar
loop :: MultiGrammar -> IO ()
loop grammar = do
s <- getLine
if s == "quit" then return () else do
treat grammar s
loop grammar
treat :: MultiGrammar -> String -> IO ()
treat grammar s = case words s of
"gt":cat:n:_ -> do
mapM_ prlinonly $ take (read n) $ G.generate grammar (CId cat)
"gtt":cat:n:_ -> do
mapM_ prlin $ take (read n) $ G.generate grammar (CId cat)
"gr":cat:n:_ -> do
gen <- newStdGen
mapM_ prlinonly $ take (read n) $ G.generateRandom gen grammar (CId cat)
"grt":cat:n:_ -> do
gen <- newStdGen
mapM_ prlin $ take (read n) $ G.generateRandom gen grammar (CId cat)
"p":lang:cat:ws -> do
let ts = parse grammar lang cat $ unwords ws
mapM_ (putStrLn . showTree) ts
"search":cat:n:ws -> do
case G.parse (read n) grammar (CId cat) ws of
t:_ -> prlin t
_ -> putStrLn "no parse found"
_ -> lins $ readTree grammar s
where
langs = languages grammar
lins t = mapM_ (lint t) $ langs
lint t lang = do
---- putStrLn $ showTree $ linExp grammar lang t
lin t lang
lin t lang = do
putStrLn $ linearize grammar lang t
prlins t = do
putStrLn $ showTree t
lins t
prlin t = do
putStrLn $ showTree t
prlinonly t
prlinonly t = mapM_ (lin t) $ langs

View File

@@ -192,7 +192,7 @@ tools/$(GF_DOC_EXE): tools/GFDoc.hs
$(GHMAKE) $(GHCOPTFLAGS) -o $@ $^
gfcc:
$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/RunGFCC.hs
$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/Shell.hs
strip gfcc
mv gfcc ../bin/