mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-13 06:49:31 -06:00
started extending GFCC API with parsing
This commit is contained in:
171
src/GF/Canon/GFCC/FCFGParsing.hs
Normal file
171
src/GF/Canon/GFCC/FCFGParsing.hs
Normal file
@@ -0,0 +1,171 @@
|
||||
module GF.Canon.GFCC.FCFGParsing where
|
||||
|
||||
import GF.Canon.GFCC.DataGFCC
|
||||
import GF.Canon.GFCC.AbsGFCC
|
||||
import GF.Conversion.SimpleToFCFG (convertGrammar)
|
||||
|
||||
--import GF.System.Tracing
|
||||
--import GF.Infra.Print
|
||||
--import qualified GF.Grammar.PrGrammar as PrGrammar
|
||||
|
||||
--import GF.Data.Operations (Err(..))
|
||||
|
||||
--import qualified GF.Grammar.Grammar as Grammar
|
||||
--import qualified GF.Grammar.Macros as Macros
|
||||
--import qualified GF.Canon.AbsGFC as AbsGFC
|
||||
--import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
|
||||
--import qualified GF.Infra.Ident as Ident
|
||||
--import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
|
||||
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Formalism.Utilities --(forest2trees)
|
||||
|
||||
--import GF.Conversion.Types
|
||||
|
||||
import GF.Formalism.FCFG
|
||||
--import qualified GF.Formalism.GCFG as G
|
||||
--import qualified GF.Formalism.SimpleGFC as S
|
||||
--import qualified GF.Formalism.MCFG as M
|
||||
--import qualified GF.Formalism.CFG as C
|
||||
--import qualified GF.Parsing.MCFG as PM
|
||||
import qualified GF.Parsing.FCFG as PF
|
||||
--import qualified GF.Parsing.CFG as PC
|
||||
import GF.Canon.GFCC.ErrM
|
||||
|
||||
|
||||
--convertGrammar :: Grammar -> [(Ident,FGrammar)]
|
||||
|
||||
--import qualified GF.Parsing.GFC as New
|
||||
--checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
|
||||
-- algorithm "f"
|
||||
-- strategy "bottomup"
|
||||
|
||||
type Token = String ----
|
||||
type CFTok = String ----
|
||||
type CFCat = CId ----
|
||||
type Fun = CId ----
|
||||
|
||||
cfCat2Ident = id ----
|
||||
|
||||
wordsCFTok :: CFTok -> [String]
|
||||
wordsCFTok = return ----
|
||||
|
||||
|
||||
type FCFPInfo = PF.FCFPInfo FCat FName Token
|
||||
|
||||
-- main parsing function
|
||||
|
||||
parse ::
|
||||
-- String -> -- ^ parsing algorithm (mcfg or cfg)
|
||||
-- String -> -- ^ parsing strategy
|
||||
FCFPInfo -> -- ^ compiled grammar (fcfg)
|
||||
-- Ident.Ident -> -- ^ abstract module name
|
||||
CFCat -> -- ^ starting category
|
||||
[CFTok] -> -- ^ input tokens
|
||||
Err [Exp] -- ^ resulting GF terms
|
||||
|
||||
parse pinfo startCat inString =
|
||||
|
||||
do let inTokens = inputMany (map wordsCFTok inString)
|
||||
forests <- selectParser pinfo startCat inTokens
|
||||
let filteredForests = forests >>= applyProfileToForest
|
||||
trees = nubsort $ filteredForests >>= forest2trees
|
||||
|
||||
return $ map tree2term trees
|
||||
|
||||
|
||||
-- parsing via FCFG
|
||||
selectParser pinfo startCat inTokens
|
||||
= do let startCats = filter isStart $ PF.grammarCats fcfpi
|
||||
isStart cat = cat' == cfCat2Ident startCat
|
||||
where CId x = fcat2cid cat
|
||||
cat' = CId x
|
||||
fcfpi = pinfo
|
||||
fcfParser <- PF.parseFCF "bottomup"
|
||||
let chart = fcfParser fcfpi startCats inTokens
|
||||
(i,j) = inputBounds inTokens
|
||||
finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats]
|
||||
return $ map cnv_forests $ chart2forests chart (const False) finalEdges
|
||||
|
||||
cnv_forests FMeta = FMeta
|
||||
cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
|
||||
cnv_forests (FString x) = FString x
|
||||
cnv_forests (FInt x) = FInt x
|
||||
cnv_forests (FFloat x) = FFloat x
|
||||
|
||||
cnv_profile (Unify x) = Unify x
|
||||
cnv_profile (Constant x) = Constant (cnv_forests2 x)
|
||||
|
||||
cnv_forests2 FMeta = FMeta
|
||||
cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
|
||||
cnv_forests2 (FString x) = FString x
|
||||
cnv_forests2 (FInt x) = FInt x
|
||||
cnv_forests2 (FFloat x) = FFloat x
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parse trees to GFCC terms
|
||||
|
||||
tree2term :: SyntaxTree Fun -> Exp
|
||||
tree2term (TNode f ts) = Tr (AC (CId f)) (map tree2term ts)
|
||||
{- ----
|
||||
tree2term (TString s) = Macros.string2term s
|
||||
tree2term (TInt n) = Macros.int2term n
|
||||
tree2term (TFloat f) = Macros.float2term f
|
||||
tree2term (TMeta) = Macros.mkMeta 0
|
||||
-}
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- conversion and unification of forests
|
||||
|
||||
-- simplest implementation
|
||||
applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
|
||||
applyProfileToForest (FNode name@(Name fun profile) children)
|
||||
| isCoercion name = concat chForests
|
||||
| otherwise = [ FNode fun chForests | not (null chForests) ]
|
||||
where chForests = concat [ applyProfileM unifyManyForests profile forests |
|
||||
forests0 <- children,
|
||||
forests <- mapM applyProfileToForest forests0 ]
|
||||
applyProfileToForest (FString s) = [FString s]
|
||||
applyProfileToForest (FInt n) = [FInt n]
|
||||
applyProfileToForest (FFloat f) = [FFloat f]
|
||||
applyProfileToForest (FMeta) = [FMeta]
|
||||
|
||||
|
||||
--------------------- From parsing types ------------------------------
|
||||
|
||||
-- * fast nonerasing MCFG
|
||||
|
||||
type FIndex = Int
|
||||
type FPath = [FIndex]
|
||||
type FName = NameProfile CId
|
||||
type FGrammar = FCFGrammar FCat FName Token
|
||||
type FRule = FCFRule FCat FName Token
|
||||
data FCat = FCat {-# UNPACK #-} !Int CId [FPath] [(FPath,FIndex)]
|
||||
|
||||
initialFCat :: CId -> FCat
|
||||
initialFCat cat = FCat 0 cat [] []
|
||||
|
||||
fcatString = FCat (-1) (CId "String") [[0]] []
|
||||
fcatInt = FCat (-2) (CId "Int") [[0]] []
|
||||
fcatFloat = FCat (-3) (CId "Float") [[0]] []
|
||||
|
||||
fcat2cid :: FCat -> CId
|
||||
fcat2cid (FCat _ c _ _) = c
|
||||
|
||||
instance Eq FCat where
|
||||
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
|
||||
|
||||
instance Ord FCat where
|
||||
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
isCoercion :: Name -> Bool
|
||||
isCoercion (Name fun [Unify [0]]) = False -- isWildIdent fun
|
||||
isCoercion _ = False
|
||||
|
||||
type Name = NameProfile Fun
|
||||
115
src/GF/Canon/GFCC/GFCCAPI.hs
Normal file
115
src/GF/Canon/GFCC/GFCCAPI.hs
Normal file
@@ -0,0 +1,115 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : GFCCAPI
|
||||
-- Maintainer : Aarne Ranta
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date:
|
||||
-- > CVS $Author:
|
||||
-- > CVS $Revision:
|
||||
--
|
||||
-- Reduced Application Programmer's Interface to GF, meant for
|
||||
-- embedded GF systems. AR 19/9/2007
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Canon.GFCC.GFCCAPI where
|
||||
|
||||
import GF.Canon.GFCC.DataGFCC
|
||||
--import GF.Canon.GFCC.GenGFCC
|
||||
import GF.Canon.GFCC.AbsGFCC
|
||||
import GF.Canon.GFCC.ParGFCC
|
||||
import GF.Canon.GFCC.PrintGFCC
|
||||
import GF.Canon.GFCC.ErrM
|
||||
--import GF.Data.Operations
|
||||
--import GF.Infra.UseIO
|
||||
import qualified Data.Map as Map
|
||||
import System.Random (newStdGen)
|
||||
import System.Directory (doesFileExist)
|
||||
import System
|
||||
|
||||
-- This API is meant to be used when embedding GF grammars in Haskell
|
||||
-- programs. The embedded system is supposed to use the
|
||||
-- .gfcm grammar format, which is first produced by the gf program.
|
||||
|
||||
---------------------------------------------------
|
||||
-- Interface
|
||||
---------------------------------------------------
|
||||
|
||||
type MultiGrammar = GFCC
|
||||
type Language = String
|
||||
type Category = String
|
||||
type Tree = Exp
|
||||
|
||||
file2grammar :: FilePath -> IO MultiGrammar
|
||||
|
||||
linearize :: MultiGrammar -> Language -> Tree -> String
|
||||
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
|
||||
|
||||
linearizeAll :: MultiGrammar -> Tree -> [String]
|
||||
linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
|
||||
|
||||
--parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
|
||||
--parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
|
||||
|
||||
readTree :: MultiGrammar -> String -> Tree
|
||||
showTree :: Tree -> String
|
||||
|
||||
languages :: MultiGrammar -> [Language]
|
||||
categories :: MultiGrammar -> [Category]
|
||||
|
||||
startCat :: MultiGrammar -> Category
|
||||
|
||||
---------------------------------------------------
|
||||
-- Implementation
|
||||
---------------------------------------------------
|
||||
|
||||
file2grammar f =
|
||||
readFileIf f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer
|
||||
|
||||
linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize mgr (CId lang)
|
||||
|
||||
|
||||
parse mgr lang cat s = []
|
||||
{-
|
||||
map tree2exp .
|
||||
errVal [] .
|
||||
parseString (stateOptions sgr) sgr cfcat
|
||||
where
|
||||
sgr = stateGrammarOfLang mgr (zIdent lang)
|
||||
cfcat = string2CFCat abs cat
|
||||
abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
|
||||
-}
|
||||
|
||||
linearizeAll mgr = map snd . linearizeAllLang mgr
|
||||
linearizeAllLang mgr t = [(lang,linearThis mgr lang t) | lang <- languages mgr]
|
||||
|
||||
{-
|
||||
parseAll mgr cat = map snd . parseAllLang mgr cat
|
||||
|
||||
parseAllLang mgr cat s =
|
||||
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
|
||||
-}
|
||||
|
||||
readTree _ = err (const exp0) id . (pExp . myLexer)
|
||||
|
||||
showTree t = printTree t
|
||||
|
||||
languages mgr = [l | CId l <- cncnames mgr]
|
||||
|
||||
categories mgr = [c | CId c <- Map.keys (cats (abstract mgr))]
|
||||
|
||||
startCat mgr = "S" ----
|
||||
|
||||
------------ for internal use only
|
||||
|
||||
linearThis = GF.Canon.GFCC.GFCCAPI.linearize
|
||||
|
||||
err f g ex = case ex of
|
||||
Ok x -> g x
|
||||
Bad s -> f s
|
||||
|
||||
readFileIf f = do
|
||||
b <- doesFileExist f
|
||||
if b then readFile f
|
||||
else putStrLn ("file " ++ f ++ " not found") >> return ""
|
||||
@@ -448,14 +448,14 @@ happyReduction_42 happy_x_2
|
||||
happy_x_1
|
||||
= case happyOut24 happy_x_2 of { happy_var_2 ->
|
||||
happyIn36
|
||||
(V happy_var_2
|
||||
(V (fromInteger happy_var_2) --H
|
||||
)}
|
||||
|
||||
happyReduce_43 = happySpecReduce_1 13# happyReduction_43
|
||||
happyReduction_43 happy_x_1
|
||||
= case happyOut24 happy_x_1 of { happy_var_1 ->
|
||||
happyIn36
|
||||
(C happy_var_1
|
||||
(C (fromInteger happy_var_1) --H
|
||||
)}
|
||||
|
||||
happyReduce_44 = happySpecReduce_1 13# happyReduction_44
|
||||
|
||||
62
src/GF/Canon/GFCC/Shell.hs
Normal file
62
src/GF/Canon/GFCC/Shell.hs
Normal file
@@ -0,0 +1,62 @@
|
||||
module Main where
|
||||
|
||||
import GF.Canon.GFCC.GFCCAPI
|
||||
import qualified GF.Canon.GFCC.GenGFCC as G ---
|
||||
import GF.Canon.GFCC.AbsGFCC (CId(CId)) ---
|
||||
import System.Random (newStdGen)
|
||||
import System (getArgs)
|
||||
|
||||
|
||||
-- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
file:_ <- getArgs
|
||||
grammar <- file2grammar file
|
||||
putStrLn $ "languages: " ++ unwords (languages grammar)
|
||||
putStrLn $ "categories: " ++ unwords (categories grammar)
|
||||
loop grammar
|
||||
|
||||
loop :: MultiGrammar -> IO ()
|
||||
loop grammar = do
|
||||
s <- getLine
|
||||
if s == "quit" then return () else do
|
||||
treat grammar s
|
||||
loop grammar
|
||||
|
||||
treat :: MultiGrammar -> String -> IO ()
|
||||
treat grammar s = case words s of
|
||||
"gt":cat:n:_ -> do
|
||||
mapM_ prlinonly $ take (read n) $ G.generate grammar (CId cat)
|
||||
"gtt":cat:n:_ -> do
|
||||
mapM_ prlin $ take (read n) $ G.generate grammar (CId cat)
|
||||
"gr":cat:n:_ -> do
|
||||
gen <- newStdGen
|
||||
mapM_ prlinonly $ take (read n) $ G.generateRandom gen grammar (CId cat)
|
||||
"grt":cat:n:_ -> do
|
||||
gen <- newStdGen
|
||||
mapM_ prlin $ take (read n) $ G.generateRandom gen grammar (CId cat)
|
||||
"p":lang:cat:ws -> do
|
||||
let ts = parse grammar lang cat $ unwords ws
|
||||
mapM_ (putStrLn . showTree) ts
|
||||
"search":cat:n:ws -> do
|
||||
case G.parse (read n) grammar (CId cat) ws of
|
||||
t:_ -> prlin t
|
||||
_ -> putStrLn "no parse found"
|
||||
_ -> lins $ readTree grammar s
|
||||
where
|
||||
langs = languages grammar
|
||||
lins t = mapM_ (lint t) $ langs
|
||||
lint t lang = do
|
||||
---- putStrLn $ showTree $ linExp grammar lang t
|
||||
lin t lang
|
||||
lin t lang = do
|
||||
putStrLn $ linearize grammar lang t
|
||||
prlins t = do
|
||||
putStrLn $ showTree t
|
||||
lins t
|
||||
prlin t = do
|
||||
putStrLn $ showTree t
|
||||
prlinonly t
|
||||
prlinonly t = mapM_ (lin t) $ langs
|
||||
|
||||
@@ -192,7 +192,7 @@ tools/$(GF_DOC_EXE): tools/GFDoc.hs
|
||||
$(GHMAKE) $(GHCOPTFLAGS) -o $@ $^
|
||||
|
||||
gfcc:
|
||||
$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/RunGFCC.hs
|
||||
$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/Shell.hs
|
||||
strip gfcc
|
||||
mv gfcc ../bin/
|
||||
|
||||
|
||||
Reference in New Issue
Block a user