started extending GFCC API with parsing

2026-07-16 10:32:46 -06:00 · 2007-09-19 16:48:13 +00:00
parent 6dcd33ec26
commit 7324597039
5 changed files with 351 additions and 3 deletions
@@ -0,0 +1,171 @@
+module GF.Canon.GFCC.FCFGParsing where
+
+import GF.Canon.GFCC.DataGFCC
+import GF.Canon.GFCC.AbsGFCC
+import GF.Conversion.SimpleToFCFG (convertGrammar)
+
+--import GF.System.Tracing 
+--import GF.Infra.Print
+--import qualified GF.Grammar.PrGrammar as PrGrammar
+
+--import GF.Data.Operations (Err(..))
+
+--import qualified GF.Grammar.Grammar as Grammar
+--import qualified GF.Grammar.Macros as Macros
+--import qualified GF.Canon.AbsGFC as AbsGFC
+--import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
+--import qualified GF.Infra.Ident as Ident
+--import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
+
+import GF.Data.SortedList 
+import GF.Data.Assoc
+import GF.Formalism.Utilities --(forest2trees)
+
+--import GF.Conversion.Types
+
+import GF.Formalism.FCFG
+--import qualified GF.Formalism.GCFG as G
+--import qualified GF.Formalism.SimpleGFC as S
+--import qualified GF.Formalism.MCFG as M
+--import qualified GF.Formalism.CFG as C
+--import qualified GF.Parsing.MCFG as PM
+import qualified GF.Parsing.FCFG as PF
+--import qualified GF.Parsing.CFG as PC
+import GF.Canon.GFCC.ErrM
+
+
+--convertGrammar :: Grammar -> [(Ident,FGrammar)]
+
+--import qualified GF.Parsing.GFC as New
+--checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
+-- algorithm "f"
+-- strategy "bottomup"
+
+type Token = String ----
+type CFTok = String ----
+type CFCat = CId ----
+type Fun = CId ----
+
+cfCat2Ident = id ----
+
+wordsCFTok :: CFTok -> [String]
+wordsCFTok = return ----
+
+
+type FCFPInfo = PF.FCFPInfo FCat FName Token
+
+-- main parsing function
+
+parse :: 
+--         String ->      -- ^ parsing algorithm (mcfg or cfg)
+--         String ->      -- ^ parsing strategy
+      FCFPInfo ->       -- ^ compiled grammar (fcfg) 
+--      Ident.Ident ->    -- ^ abstract module name
+      CFCat ->          -- ^ starting category
+      [CFTok] ->        -- ^ input tokens
+      Err [Exp]         -- ^ resulting GF terms
+
+parse pinfo startCat inString = 
+
+    do let inTokens = inputMany (map wordsCFTok inString)
+       forests <- selectParser pinfo startCat inTokens
+       let filteredForests = forests >>= applyProfileToForest
+	   trees = nubsort $ filteredForests >>= forest2trees
+
+       return $ map tree2term trees
+
+
+-- parsing via FCFG
+selectParser pinfo startCat inTokens
+    = do let startCats = filter isStart $ PF.grammarCats fcfpi
+	     isStart cat = cat' == cfCat2Ident startCat
+	       where CId x = fcat2cid cat
+	             cat'  = CId x
+	     fcfpi = pinfo
+	 fcfParser <- PF.parseFCF "bottomup"
+	 let chart = fcfParser fcfpi startCats inTokens
+	     (i,j) = inputBounds inTokens
+	     finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats]
+	 return $ map cnv_forests $ chart2forests chart (const False) finalEdges
+
+cnv_forests FMeta         = FMeta
+cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
+cnv_forests (FString x)   = FString x
+cnv_forests (FInt    x)   = FInt    x
+cnv_forests (FFloat  x)   = FFloat  x
+
+cnv_profile (Unify    x) = Unify x
+cnv_profile (Constant x) = Constant (cnv_forests2 x)
+
+cnv_forests2 FMeta         = FMeta
+cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
+cnv_forests2 (FString x)   = FString x
+cnv_forests2 (FInt    x)   = FInt    x
+cnv_forests2 (FFloat  x)   = FFloat  x
+
+----------------------------------------------------------------------
+-- parse trees to GFCC terms
+
+tree2term :: SyntaxTree Fun -> Exp
+tree2term (TNode f ts) = Tr (AC (CId f)) (map tree2term ts)
+{- ----
+tree2term (TString  s) = Macros.string2term s
+tree2term (TInt     n) = Macros.int2term    n
+tree2term (TFloat   f) = Macros.float2term  f
+tree2term (TMeta)      = Macros.mkMeta 0
+-}
+
+
+----------------------------------------------------------------------
+-- conversion and unification of forests
+
+-- simplest implementation
+applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
+applyProfileToForest (FNode name@(Name fun profile) children) 
+    | isCoercion name = concat chForests
+    | otherwise       = [ FNode fun chForests | not (null chForests) ]
+    where chForests   = concat [ applyProfileM unifyManyForests profile forests |
+				 forests0 <- children,
+				 forests <- mapM applyProfileToForest forests0 ]
+applyProfileToForest (FString s) = [FString s]
+applyProfileToForest (FInt    n) = [FInt    n]
+applyProfileToForest (FFloat  f) = [FFloat  f]
+applyProfileToForest (FMeta)     = [FMeta]
+
+
+--------------------- From parsing types ------------------------------
+
+-- * fast nonerasing MCFG
+
+type FIndex   = Int
+type FPath    = [FIndex]
+type FName    = NameProfile CId
+type FGrammar = FCFGrammar FCat FName Token
+type FRule    = FCFRule    FCat FName Token
+data FCat     = FCat  {-# UNPACK #-} !Int CId [FPath] [(FPath,FIndex)]
+
+initialFCat :: CId -> FCat
+initialFCat cat = FCat 0 cat [] []
+
+fcatString = FCat (-1) (CId "String") [[0]] []
+fcatInt    = FCat (-2) (CId "Int")    [[0]] []
+fcatFloat  = FCat (-3) (CId "Float")  [[0]] []
+
+fcat2cid :: FCat -> CId
+fcat2cid (FCat _ c _ _) = c
+
+instance Eq FCat where
+  (FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
+
+instance Ord FCat where
+  compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
+
+
+
+---
+
+isCoercion :: Name -> Bool
+isCoercion (Name fun [Unify [0]]) = False -- isWildIdent fun
+isCoercion _ = False
+
+type Name = NameProfile Fun
@@ -0,0 +1,115 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : GFCCAPI
+-- Maintainer  : Aarne Ranta
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 
+-- > CVS $Author: 
+-- > CVS $Revision: 
+--
+-- Reduced Application Programmer's Interface to GF, meant for
+-- embedded GF systems. AR 19/9/2007
+-----------------------------------------------------------------------------
+
+module  GF.Canon.GFCC.GFCCAPI where
+
+import GF.Canon.GFCC.DataGFCC
+--import GF.Canon.GFCC.GenGFCC
+import GF.Canon.GFCC.AbsGFCC
+import GF.Canon.GFCC.ParGFCC
+import GF.Canon.GFCC.PrintGFCC
+import GF.Canon.GFCC.ErrM
+--import GF.Data.Operations
+--import GF.Infra.UseIO
+import qualified Data.Map as Map
+import System.Random (newStdGen)
+import System.Directory (doesFileExist)
+import System
+
+-- This API is meant to be used when embedding GF grammars in Haskell 
+-- programs. The embedded system is supposed to use the
+-- .gfcm grammar format, which is first produced by the gf program.
+
+---------------------------------------------------
+-- Interface
+---------------------------------------------------
+
+type MultiGrammar = GFCC
+type Language     = String
+type Category     = String
+type Tree         = Exp
+
+file2grammar :: FilePath -> IO MultiGrammar
+
+linearize    :: MultiGrammar -> Language -> Tree -> String
+parse        :: MultiGrammar -> Language -> Category -> String -> [Tree]
+
+linearizeAll     :: MultiGrammar -> Tree -> [String]
+linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
+
+--parseAll     :: MultiGrammar -> Category -> String -> [[Tree]]
+--parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
+
+readTree   :: MultiGrammar -> String -> Tree
+showTree   ::                 Tree -> String
+
+languages  :: MultiGrammar -> [Language]
+categories :: MultiGrammar -> [Category]
+
+startCat   :: MultiGrammar -> Category
+
+---------------------------------------------------
+-- Implementation
+---------------------------------------------------
+
+file2grammar f =
+  readFileIf f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer
+
+linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize mgr (CId lang)
+
+
+parse mgr lang cat s = [] 
+{-
+  map tree2exp . 
+  errVal [] . 
+  parseString (stateOptions sgr) sgr cfcat
+ where
+   sgr   = stateGrammarOfLang mgr (zIdent lang)
+   cfcat = string2CFCat abs cat
+   abs   = maybe (error "no abstract syntax") prIdent $ abstract mgr
+-}
+
+linearizeAll mgr = map snd . linearizeAllLang mgr
+linearizeAllLang mgr t = [(lang,linearThis mgr lang t) | lang <- languages mgr]
+
+{-
+parseAll mgr cat = map snd . parseAllLang mgr cat
+
+parseAllLang mgr cat s = 
+  [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
+-}
+
+readTree _ = err (const exp0) id . (pExp . myLexer)
+
+showTree t = printTree t
+
+languages mgr = [l | CId l <- cncnames mgr]
+
+categories mgr = [c | CId c <- Map.keys (cats (abstract mgr))]
+
+startCat mgr = "S" ----
+
+------------ for internal use only
+
+linearThis = GF.Canon.GFCC.GFCCAPI.linearize
+
+err f g ex = case ex of
+  Ok x -> g x
+  Bad s -> f s
+
+readFileIf f = do
+  b <- doesFileExist f
+  if b then readFile f 
+       else putStrLn ("file " ++ f ++ " not found") >> return ""
@@ -448,14 +448,14 @@ happyReduction_42 happy_x_2
 	happy_x_1
 	 =  case happyOut24 happy_x_2 of { happy_var_2 -> 
 	happyIn36
-		 (V happy_var_2
+		 (V (fromInteger happy_var_2) --H
 	)}

 happyReduce_43 = happySpecReduce_1 13# happyReduction_43
 happyReduction_43 happy_x_1
 	 =  case happyOut24 happy_x_1 of { happy_var_1 -> 
 	happyIn36
-		 (C happy_var_1
+		 (C (fromInteger happy_var_1) --H
 	)}

 happyReduce_44 = happySpecReduce_1 13# happyReduction_44
@@ -0,0 +1,62 @@
+module Main where
+
+import GF.Canon.GFCC.GFCCAPI
+import qualified GF.Canon.GFCC.GenGFCC as G ---
+import GF.Canon.GFCC.AbsGFCC (CId(CId)) ---
+import System.Random (newStdGen)
+import System (getArgs)
+
+
+-- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007
+
+main :: IO ()
+main = do
+  file:_  <- getArgs
+  grammar <- file2grammar file
+  putStrLn $ "languages:  " ++ unwords (languages grammar)
+  putStrLn $ "categories: " ++ unwords (categories grammar)
+  loop grammar
+
+loop :: MultiGrammar -> IO ()
+loop grammar = do
+  s <- getLine
+  if s == "quit" then return () else do
+    treat grammar s
+    loop grammar
+
+treat :: MultiGrammar -> String -> IO ()
+treat grammar s = case words s of
+  "gt":cat:n:_ -> do
+    mapM_ prlinonly $ take (read n) $ G.generate grammar (CId cat)
+  "gtt":cat:n:_ -> do
+    mapM_ prlin $ take (read n) $ G.generate grammar (CId cat)
+  "gr":cat:n:_ -> do
+    gen <- newStdGen
+    mapM_ prlinonly $ take (read n) $ G.generateRandom gen grammar (CId cat)
+  "grt":cat:n:_ -> do
+    gen <- newStdGen
+    mapM_ prlin $ take (read n) $ G.generateRandom gen grammar (CId cat)
+  "p":lang:cat:ws -> do
+    let ts = parse grammar lang cat $ unwords ws
+    mapM_ (putStrLn . showTree) ts 
+  "search":cat:n:ws -> do
+    case G.parse (read n) grammar (CId cat) ws of
+      t:_ -> prlin t
+      _ -> putStrLn "no parse found" 
+  _ -> lins $ readTree grammar s
+ where
+  langs = languages grammar
+  lins t = mapM_ (lint t) $ langs 
+  lint t lang = do
+----    putStrLn $ showTree $ linExp grammar lang t 
+    lin t lang
+  lin t lang = do
+    putStrLn $ linearize grammar lang t
+  prlins t = do
+    putStrLn $ showTree t
+    lins t
+  prlin t = do
+    putStrLn $ showTree t
+    prlinonly t
+  prlinonly t = mapM_ (lin t) $ langs
+
@@ -192,7 +192,7 @@ tools/$(GF_DOC_EXE): tools/GFDoc.hs
 	$(GHMAKE) $(GHCOPTFLAGS) -o $@ $^

 gfcc:
-	$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/RunGFCC.hs
+	$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/Shell.hs
 	strip gfcc
 	mv gfcc ../bin/