mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
started extending GFCC API with parsing
This commit is contained in:
171
src/GF/Canon/GFCC/FCFGParsing.hs
Normal file
171
src/GF/Canon/GFCC/FCFGParsing.hs
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
module GF.Canon.GFCC.FCFGParsing where
|
||||||
|
|
||||||
|
import GF.Canon.GFCC.DataGFCC
|
||||||
|
import GF.Canon.GFCC.AbsGFCC
|
||||||
|
import GF.Conversion.SimpleToFCFG (convertGrammar)
|
||||||
|
|
||||||
|
--import GF.System.Tracing
|
||||||
|
--import GF.Infra.Print
|
||||||
|
--import qualified GF.Grammar.PrGrammar as PrGrammar
|
||||||
|
|
||||||
|
--import GF.Data.Operations (Err(..))
|
||||||
|
|
||||||
|
--import qualified GF.Grammar.Grammar as Grammar
|
||||||
|
--import qualified GF.Grammar.Macros as Macros
|
||||||
|
--import qualified GF.Canon.AbsGFC as AbsGFC
|
||||||
|
--import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
|
||||||
|
--import qualified GF.Infra.Ident as Ident
|
||||||
|
--import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
|
||||||
|
|
||||||
|
import GF.Data.SortedList
|
||||||
|
import GF.Data.Assoc
|
||||||
|
import GF.Formalism.Utilities --(forest2trees)
|
||||||
|
|
||||||
|
--import GF.Conversion.Types
|
||||||
|
|
||||||
|
import GF.Formalism.FCFG
|
||||||
|
--import qualified GF.Formalism.GCFG as G
|
||||||
|
--import qualified GF.Formalism.SimpleGFC as S
|
||||||
|
--import qualified GF.Formalism.MCFG as M
|
||||||
|
--import qualified GF.Formalism.CFG as C
|
||||||
|
--import qualified GF.Parsing.MCFG as PM
|
||||||
|
import qualified GF.Parsing.FCFG as PF
|
||||||
|
--import qualified GF.Parsing.CFG as PC
|
||||||
|
import GF.Canon.GFCC.ErrM
|
||||||
|
|
||||||
|
|
||||||
|
--convertGrammar :: Grammar -> [(Ident,FGrammar)]
|
||||||
|
|
||||||
|
--import qualified GF.Parsing.GFC as New
|
||||||
|
--checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
|
||||||
|
-- algorithm "f"
|
||||||
|
-- strategy "bottomup"
|
||||||
|
|
||||||
|
type Token = String ----
|
||||||
|
type CFTok = String ----
|
||||||
|
type CFCat = CId ----
|
||||||
|
type Fun = CId ----
|
||||||
|
|
||||||
|
cfCat2Ident = id ----
|
||||||
|
|
||||||
|
wordsCFTok :: CFTok -> [String]
|
||||||
|
wordsCFTok = return ----
|
||||||
|
|
||||||
|
|
||||||
|
type FCFPInfo = PF.FCFPInfo FCat FName Token
|
||||||
|
|
||||||
|
-- main parsing function
|
||||||
|
|
||||||
|
parse ::
|
||||||
|
-- String -> -- ^ parsing algorithm (mcfg or cfg)
|
||||||
|
-- String -> -- ^ parsing strategy
|
||||||
|
FCFPInfo -> -- ^ compiled grammar (fcfg)
|
||||||
|
-- Ident.Ident -> -- ^ abstract module name
|
||||||
|
CFCat -> -- ^ starting category
|
||||||
|
[CFTok] -> -- ^ input tokens
|
||||||
|
Err [Exp] -- ^ resulting GF terms
|
||||||
|
|
||||||
|
parse pinfo startCat inString =
|
||||||
|
|
||||||
|
do let inTokens = inputMany (map wordsCFTok inString)
|
||||||
|
forests <- selectParser pinfo startCat inTokens
|
||||||
|
let filteredForests = forests >>= applyProfileToForest
|
||||||
|
trees = nubsort $ filteredForests >>= forest2trees
|
||||||
|
|
||||||
|
return $ map tree2term trees
|
||||||
|
|
||||||
|
|
||||||
|
-- parsing via FCFG
|
||||||
|
selectParser pinfo startCat inTokens
|
||||||
|
= do let startCats = filter isStart $ PF.grammarCats fcfpi
|
||||||
|
isStart cat = cat' == cfCat2Ident startCat
|
||||||
|
where CId x = fcat2cid cat
|
||||||
|
cat' = CId x
|
||||||
|
fcfpi = pinfo
|
||||||
|
fcfParser <- PF.parseFCF "bottomup"
|
||||||
|
let chart = fcfParser fcfpi startCats inTokens
|
||||||
|
(i,j) = inputBounds inTokens
|
||||||
|
finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats]
|
||||||
|
return $ map cnv_forests $ chart2forests chart (const False) finalEdges
|
||||||
|
|
||||||
|
cnv_forests FMeta = FMeta
|
||||||
|
cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
|
||||||
|
cnv_forests (FString x) = FString x
|
||||||
|
cnv_forests (FInt x) = FInt x
|
||||||
|
cnv_forests (FFloat x) = FFloat x
|
||||||
|
|
||||||
|
cnv_profile (Unify x) = Unify x
|
||||||
|
cnv_profile (Constant x) = Constant (cnv_forests2 x)
|
||||||
|
|
||||||
|
cnv_forests2 FMeta = FMeta
|
||||||
|
cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
|
||||||
|
cnv_forests2 (FString x) = FString x
|
||||||
|
cnv_forests2 (FInt x) = FInt x
|
||||||
|
cnv_forests2 (FFloat x) = FFloat x
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
-- parse trees to GFCC terms
|
||||||
|
|
||||||
|
tree2term :: SyntaxTree Fun -> Exp
|
||||||
|
tree2term (TNode f ts) = Tr (AC (CId f)) (map tree2term ts)
|
||||||
|
{- ----
|
||||||
|
tree2term (TString s) = Macros.string2term s
|
||||||
|
tree2term (TInt n) = Macros.int2term n
|
||||||
|
tree2term (TFloat f) = Macros.float2term f
|
||||||
|
tree2term (TMeta) = Macros.mkMeta 0
|
||||||
|
-}
|
||||||
|
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
-- conversion and unification of forests
|
||||||
|
|
||||||
|
-- simplest implementation
|
||||||
|
applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
|
||||||
|
applyProfileToForest (FNode name@(Name fun profile) children)
|
||||||
|
| isCoercion name = concat chForests
|
||||||
|
| otherwise = [ FNode fun chForests | not (null chForests) ]
|
||||||
|
where chForests = concat [ applyProfileM unifyManyForests profile forests |
|
||||||
|
forests0 <- children,
|
||||||
|
forests <- mapM applyProfileToForest forests0 ]
|
||||||
|
applyProfileToForest (FString s) = [FString s]
|
||||||
|
applyProfileToForest (FInt n) = [FInt n]
|
||||||
|
applyProfileToForest (FFloat f) = [FFloat f]
|
||||||
|
applyProfileToForest (FMeta) = [FMeta]
|
||||||
|
|
||||||
|
|
||||||
|
--------------------- From parsing types ------------------------------
|
||||||
|
|
||||||
|
-- * fast nonerasing MCFG
|
||||||
|
|
||||||
|
type FIndex = Int
|
||||||
|
type FPath = [FIndex]
|
||||||
|
type FName = NameProfile CId
|
||||||
|
type FGrammar = FCFGrammar FCat FName Token
|
||||||
|
type FRule = FCFRule FCat FName Token
|
||||||
|
data FCat = FCat {-# UNPACK #-} !Int CId [FPath] [(FPath,FIndex)]
|
||||||
|
|
||||||
|
initialFCat :: CId -> FCat
|
||||||
|
initialFCat cat = FCat 0 cat [] []
|
||||||
|
|
||||||
|
fcatString = FCat (-1) (CId "String") [[0]] []
|
||||||
|
fcatInt = FCat (-2) (CId "Int") [[0]] []
|
||||||
|
fcatFloat = FCat (-3) (CId "Float") [[0]] []
|
||||||
|
|
||||||
|
fcat2cid :: FCat -> CId
|
||||||
|
fcat2cid (FCat _ c _ _) = c
|
||||||
|
|
||||||
|
instance Eq FCat where
|
||||||
|
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
|
||||||
|
|
||||||
|
instance Ord FCat where
|
||||||
|
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
isCoercion :: Name -> Bool
|
||||||
|
isCoercion (Name fun [Unify [0]]) = False -- isWildIdent fun
|
||||||
|
isCoercion _ = False
|
||||||
|
|
||||||
|
type Name = NameProfile Fun
|
||||||
115
src/GF/Canon/GFCC/GFCCAPI.hs
Normal file
115
src/GF/Canon/GFCC/GFCCAPI.hs
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : GFCCAPI
|
||||||
|
-- Maintainer : Aarne Ranta
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date:
|
||||||
|
-- > CVS $Author:
|
||||||
|
-- > CVS $Revision:
|
||||||
|
--
|
||||||
|
-- Reduced Application Programmer's Interface to GF, meant for
|
||||||
|
-- embedded GF systems. AR 19/9/2007
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Canon.GFCC.GFCCAPI where
|
||||||
|
|
||||||
|
import GF.Canon.GFCC.DataGFCC
|
||||||
|
--import GF.Canon.GFCC.GenGFCC
|
||||||
|
import GF.Canon.GFCC.AbsGFCC
|
||||||
|
import GF.Canon.GFCC.ParGFCC
|
||||||
|
import GF.Canon.GFCC.PrintGFCC
|
||||||
|
import GF.Canon.GFCC.ErrM
|
||||||
|
--import GF.Data.Operations
|
||||||
|
--import GF.Infra.UseIO
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import System.Random (newStdGen)
|
||||||
|
import System.Directory (doesFileExist)
|
||||||
|
import System
|
||||||
|
|
||||||
|
-- This API is meant to be used when embedding GF grammars in Haskell
|
||||||
|
-- programs. The embedded system is supposed to use the
|
||||||
|
-- .gfcm grammar format, which is first produced by the gf program.
|
||||||
|
|
||||||
|
---------------------------------------------------
|
||||||
|
-- Interface
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
type MultiGrammar = GFCC
|
||||||
|
type Language = String
|
||||||
|
type Category = String
|
||||||
|
type Tree = Exp
|
||||||
|
|
||||||
|
file2grammar :: FilePath -> IO MultiGrammar
|
||||||
|
|
||||||
|
linearize :: MultiGrammar -> Language -> Tree -> String
|
||||||
|
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
|
||||||
|
|
||||||
|
linearizeAll :: MultiGrammar -> Tree -> [String]
|
||||||
|
linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
|
||||||
|
|
||||||
|
--parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
|
||||||
|
--parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
|
||||||
|
|
||||||
|
readTree :: MultiGrammar -> String -> Tree
|
||||||
|
showTree :: Tree -> String
|
||||||
|
|
||||||
|
languages :: MultiGrammar -> [Language]
|
||||||
|
categories :: MultiGrammar -> [Category]
|
||||||
|
|
||||||
|
startCat :: MultiGrammar -> Category
|
||||||
|
|
||||||
|
---------------------------------------------------
|
||||||
|
-- Implementation
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
file2grammar f =
|
||||||
|
readFileIf f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer
|
||||||
|
|
||||||
|
linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize mgr (CId lang)
|
||||||
|
|
||||||
|
|
||||||
|
parse mgr lang cat s = []
|
||||||
|
{-
|
||||||
|
map tree2exp .
|
||||||
|
errVal [] .
|
||||||
|
parseString (stateOptions sgr) sgr cfcat
|
||||||
|
where
|
||||||
|
sgr = stateGrammarOfLang mgr (zIdent lang)
|
||||||
|
cfcat = string2CFCat abs cat
|
||||||
|
abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
|
||||||
|
-}
|
||||||
|
|
||||||
|
linearizeAll mgr = map snd . linearizeAllLang mgr
|
||||||
|
linearizeAllLang mgr t = [(lang,linearThis mgr lang t) | lang <- languages mgr]
|
||||||
|
|
||||||
|
{-
|
||||||
|
parseAll mgr cat = map snd . parseAllLang mgr cat
|
||||||
|
|
||||||
|
parseAllLang mgr cat s =
|
||||||
|
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
|
||||||
|
-}
|
||||||
|
|
||||||
|
readTree _ = err (const exp0) id . (pExp . myLexer)
|
||||||
|
|
||||||
|
showTree t = printTree t
|
||||||
|
|
||||||
|
languages mgr = [l | CId l <- cncnames mgr]
|
||||||
|
|
||||||
|
categories mgr = [c | CId c <- Map.keys (cats (abstract mgr))]
|
||||||
|
|
||||||
|
startCat mgr = "S" ----
|
||||||
|
|
||||||
|
------------ for internal use only
|
||||||
|
|
||||||
|
linearThis = GF.Canon.GFCC.GFCCAPI.linearize
|
||||||
|
|
||||||
|
err f g ex = case ex of
|
||||||
|
Ok x -> g x
|
||||||
|
Bad s -> f s
|
||||||
|
|
||||||
|
readFileIf f = do
|
||||||
|
b <- doesFileExist f
|
||||||
|
if b then readFile f
|
||||||
|
else putStrLn ("file " ++ f ++ " not found") >> return ""
|
||||||
@@ -448,14 +448,14 @@ happyReduction_42 happy_x_2
|
|||||||
happy_x_1
|
happy_x_1
|
||||||
= case happyOut24 happy_x_2 of { happy_var_2 ->
|
= case happyOut24 happy_x_2 of { happy_var_2 ->
|
||||||
happyIn36
|
happyIn36
|
||||||
(V happy_var_2
|
(V (fromInteger happy_var_2) --H
|
||||||
)}
|
)}
|
||||||
|
|
||||||
happyReduce_43 = happySpecReduce_1 13# happyReduction_43
|
happyReduce_43 = happySpecReduce_1 13# happyReduction_43
|
||||||
happyReduction_43 happy_x_1
|
happyReduction_43 happy_x_1
|
||||||
= case happyOut24 happy_x_1 of { happy_var_1 ->
|
= case happyOut24 happy_x_1 of { happy_var_1 ->
|
||||||
happyIn36
|
happyIn36
|
||||||
(C happy_var_1
|
(C (fromInteger happy_var_1) --H
|
||||||
)}
|
)}
|
||||||
|
|
||||||
happyReduce_44 = happySpecReduce_1 13# happyReduction_44
|
happyReduce_44 = happySpecReduce_1 13# happyReduction_44
|
||||||
|
|||||||
62
src/GF/Canon/GFCC/Shell.hs
Normal file
62
src/GF/Canon/GFCC/Shell.hs
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
module Main where
|
||||||
|
|
||||||
|
import GF.Canon.GFCC.GFCCAPI
|
||||||
|
import qualified GF.Canon.GFCC.GenGFCC as G ---
|
||||||
|
import GF.Canon.GFCC.AbsGFCC (CId(CId)) ---
|
||||||
|
import System.Random (newStdGen)
|
||||||
|
import System (getArgs)
|
||||||
|
|
||||||
|
|
||||||
|
-- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = do
|
||||||
|
file:_ <- getArgs
|
||||||
|
grammar <- file2grammar file
|
||||||
|
putStrLn $ "languages: " ++ unwords (languages grammar)
|
||||||
|
putStrLn $ "categories: " ++ unwords (categories grammar)
|
||||||
|
loop grammar
|
||||||
|
|
||||||
|
loop :: MultiGrammar -> IO ()
|
||||||
|
loop grammar = do
|
||||||
|
s <- getLine
|
||||||
|
if s == "quit" then return () else do
|
||||||
|
treat grammar s
|
||||||
|
loop grammar
|
||||||
|
|
||||||
|
treat :: MultiGrammar -> String -> IO ()
|
||||||
|
treat grammar s = case words s of
|
||||||
|
"gt":cat:n:_ -> do
|
||||||
|
mapM_ prlinonly $ take (read n) $ G.generate grammar (CId cat)
|
||||||
|
"gtt":cat:n:_ -> do
|
||||||
|
mapM_ prlin $ take (read n) $ G.generate grammar (CId cat)
|
||||||
|
"gr":cat:n:_ -> do
|
||||||
|
gen <- newStdGen
|
||||||
|
mapM_ prlinonly $ take (read n) $ G.generateRandom gen grammar (CId cat)
|
||||||
|
"grt":cat:n:_ -> do
|
||||||
|
gen <- newStdGen
|
||||||
|
mapM_ prlin $ take (read n) $ G.generateRandom gen grammar (CId cat)
|
||||||
|
"p":lang:cat:ws -> do
|
||||||
|
let ts = parse grammar lang cat $ unwords ws
|
||||||
|
mapM_ (putStrLn . showTree) ts
|
||||||
|
"search":cat:n:ws -> do
|
||||||
|
case G.parse (read n) grammar (CId cat) ws of
|
||||||
|
t:_ -> prlin t
|
||||||
|
_ -> putStrLn "no parse found"
|
||||||
|
_ -> lins $ readTree grammar s
|
||||||
|
where
|
||||||
|
langs = languages grammar
|
||||||
|
lins t = mapM_ (lint t) $ langs
|
||||||
|
lint t lang = do
|
||||||
|
---- putStrLn $ showTree $ linExp grammar lang t
|
||||||
|
lin t lang
|
||||||
|
lin t lang = do
|
||||||
|
putStrLn $ linearize grammar lang t
|
||||||
|
prlins t = do
|
||||||
|
putStrLn $ showTree t
|
||||||
|
lins t
|
||||||
|
prlin t = do
|
||||||
|
putStrLn $ showTree t
|
||||||
|
prlinonly t
|
||||||
|
prlinonly t = mapM_ (lin t) $ langs
|
||||||
|
|
||||||
@@ -192,7 +192,7 @@ tools/$(GF_DOC_EXE): tools/GFDoc.hs
|
|||||||
$(GHMAKE) $(GHCOPTFLAGS) -o $@ $^
|
$(GHMAKE) $(GHCOPTFLAGS) -o $@ $^
|
||||||
|
|
||||||
gfcc:
|
gfcc:
|
||||||
$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/RunGFCC.hs
|
$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/Shell.hs
|
||||||
strip gfcc
|
strip gfcc
|
||||||
mv gfcc ../bin/
|
mv gfcc ../bin/
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user