started extending GFCC API with parsing

2026-04-23 19:42:50 -06:00 · 2007-09-19 16:48:13 +00:00
parent aff4aa20c1
commit ef389db569
5 changed files with 351 additions and 3 deletions
--- a/src/GF/Canon/GFCC/FCFGParsing.hs
+++ b/src/GF/Canon/GFCC/FCFGParsing.hs
@@ -0,0 +1,171 @@
 module GF.Canon.GFCC.FCFGParsing where
 import GF.Canon.GFCC.DataGFCC
 import GF.Canon.GFCC.AbsGFCC
 import GF.Conversion.SimpleToFCFG (convertGrammar)
 --import GF.System.Tracing 
 --import GF.Infra.Print
 --import qualified GF.Grammar.PrGrammar as PrGrammar
 --import GF.Data.Operations (Err(..))
 --import qualified GF.Grammar.Grammar as Grammar
 --import qualified GF.Grammar.Macros as Macros
 --import qualified GF.Canon.AbsGFC as AbsGFC
 --import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
 --import qualified GF.Infra.Ident as Ident
 --import GF.CF.CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
 import GF.Data.SortedList 
 import GF.Data.Assoc
 import GF.Formalism.Utilities --(forest2trees)
 --import GF.Conversion.Types
 import GF.Formalism.FCFG
 --import qualified GF.Formalism.GCFG as G
 --import qualified GF.Formalism.SimpleGFC as S
 --import qualified GF.Formalism.MCFG as M
 --import qualified GF.Formalism.CFG as C
 --import qualified GF.Parsing.MCFG as PM
 import qualified GF.Parsing.FCFG as PF
 --import qualified GF.Parsing.CFG as PC
 import GF.Canon.GFCC.ErrM
 --convertGrammar :: Grammar -> [(Ident,FGrammar)]
 --import qualified GF.Parsing.GFC as New
 --checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
 -- algorithm "f"
 -- strategy "bottomup"
 type Token = String ----
 type CFTok = String ----
 type CFCat = CId ----
 type Fun = CId ----
 cfCat2Ident = id ----
 wordsCFTok :: CFTok -> [String]
 wordsCFTok = return ----
 type FCFPInfo = PF.FCFPInfo FCat FName Token
 -- main parsing function
 parse :: 
 --         String ->      -- ^ parsing algorithm (mcfg or cfg)
 --         String ->      -- ^ parsing strategy
      FCFPInfo ->       -- ^ compiled grammar (fcfg) 
 --      Ident.Ident ->    -- ^ abstract module name
      CFCat ->          -- ^ starting category
      [CFTok] ->        -- ^ input tokens
      Err [Exp]         -- ^ resulting GF terms
 parse pinfo startCat inString = 
    do let inTokens = inputMany (map wordsCFTok inString)
       forests <- selectParser pinfo startCat inTokens
       let filteredForests = forests >>= applyProfileToForest
 	   trees = nubsort $ filteredForests >>= forest2trees
       return $ map tree2term trees
 -- parsing via FCFG
 selectParser pinfo startCat inTokens
    = do let startCats = filter isStart $ PF.grammarCats fcfpi
 	     isStart cat = cat' == cfCat2Ident startCat
 	       where CId x = fcat2cid cat
 	             cat'  = CId x
 	     fcfpi = pinfo
 	 fcfParser <- PF.parseFCF "bottomup"
 	 let chart = fcfParser fcfpi startCats inTokens
 	     (i,j) = inputBounds inTokens
 	     finalEdges = [PF.makeFinalEdge cat i j | cat <- startCats]
 	 return $ map cnv_forests $ chart2forests chart (const False) finalEdges
 cnv_forests FMeta         = FMeta
 cnv_forests (FNode (Name (CId n) p) fss) = FNode (Name (CId n) (map cnv_profile p)) (map (map cnv_forests) fss)
 cnv_forests (FString x)   = FString x
 cnv_forests (FInt    x)   = FInt    x
 cnv_forests (FFloat  x)   = FFloat  x
 cnv_profile (Unify    x) = Unify x
 cnv_profile (Constant x) = Constant (cnv_forests2 x)
 cnv_forests2 FMeta         = FMeta
 cnv_forests2 (FNode (CId n) fss) = FNode (CId n) (map (map cnv_forests2) fss)
 cnv_forests2 (FString x)   = FString x
 cnv_forests2 (FInt    x)   = FInt    x
 cnv_forests2 (FFloat  x)   = FFloat  x
 ----------------------------------------------------------------------
 -- parse trees to GFCC terms
 tree2term :: SyntaxTree Fun -> Exp
 tree2term (TNode f ts) = Tr (AC (CId f)) (map tree2term ts)
 {- ----
 tree2term (TString  s) = Macros.string2term s
 tree2term (TInt     n) = Macros.int2term    n
 tree2term (TFloat   f) = Macros.float2term  f
 tree2term (TMeta)      = Macros.mkMeta 0
 -}
 ----------------------------------------------------------------------
 -- conversion and unification of forests
 -- simplest implementation
 applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
 applyProfileToForest (FNode name@(Name fun profile) children) 
    | isCoercion name = concat chForests
    | otherwise       = [ FNode fun chForests | not (null chForests) ]
    where chForests   = concat [ applyProfileM unifyManyForests profile forests |
 				 forests0 <- children,
 				 forests <- mapM applyProfileToForest forests0 ]
 applyProfileToForest (FString s) = [FString s]
 applyProfileToForest (FInt    n) = [FInt    n]
 applyProfileToForest (FFloat  f) = [FFloat  f]
 applyProfileToForest (FMeta)     = [FMeta]
 --------------------- From parsing types ------------------------------
 -- * fast nonerasing MCFG
 type FIndex   = Int
 type FPath    = [FIndex]
 type FName    = NameProfile CId
 type FGrammar = FCFGrammar FCat FName Token
 type FRule    = FCFRule    FCat FName Token
 data FCat     = FCat  {-# UNPACK #-} !Int CId [FPath] [(FPath,FIndex)]
 initialFCat :: CId -> FCat
 initialFCat cat = FCat 0 cat [] []
 fcatString = FCat (-1) (CId "String") [[0]] []
 fcatInt    = FCat (-2) (CId "Int")    [[0]] []
 fcatFloat  = FCat (-3) (CId "Float")  [[0]] []
 fcat2cid :: FCat -> CId
 fcat2cid (FCat _ c _ _) = c
 instance Eq FCat where
  (FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
 instance Ord FCat where
  compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
 ---
 isCoercion :: Name -> Bool
 isCoercion (Name fun [Unify [0]]) = False -- isWildIdent fun
 isCoercion _ = False
 type Name = NameProfile Fun
--- a/src/GF/Canon/GFCC/GFCCAPI.hs
+++ b/src/GF/Canon/GFCC/GFCCAPI.hs
@@ -0,0 +1,115 @@
 ----------------------------------------------------------------------
 -- |
 -- Module      : GFCCAPI
 -- Maintainer  : Aarne Ranta
 -- Stability   : (stable)
 -- Portability : (portable)
 --
 -- > CVS $Date: 
 -- > CVS $Author: 
 -- > CVS $Revision: 
 --
 -- Reduced Application Programmer's Interface to GF, meant for
 -- embedded GF systems. AR 19/9/2007
 -----------------------------------------------------------------------------
 module  GF.Canon.GFCC.GFCCAPI where
 import GF.Canon.GFCC.DataGFCC
 --import GF.Canon.GFCC.GenGFCC
 import GF.Canon.GFCC.AbsGFCC
 import GF.Canon.GFCC.ParGFCC
 import GF.Canon.GFCC.PrintGFCC
 import GF.Canon.GFCC.ErrM
 --import GF.Data.Operations
 --import GF.Infra.UseIO
 import qualified Data.Map as Map
 import System.Random (newStdGen)
 import System.Directory (doesFileExist)
 import System
 -- This API is meant to be used when embedding GF grammars in Haskell 
 -- programs. The embedded system is supposed to use the
 -- .gfcm grammar format, which is first produced by the gf program.
 ---------------------------------------------------
 -- Interface
 ---------------------------------------------------
 type MultiGrammar = GFCC
 type Language     = String
 type Category     = String
 type Tree         = Exp
 file2grammar :: FilePath -> IO MultiGrammar
 linearize    :: MultiGrammar -> Language -> Tree -> String
 parse        :: MultiGrammar -> Language -> Category -> String -> [Tree]
 linearizeAll     :: MultiGrammar -> Tree -> [String]
 linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
 --parseAll     :: MultiGrammar -> Category -> String -> [[Tree]]
 --parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
 readTree   :: MultiGrammar -> String -> Tree
 showTree   ::                 Tree -> String
 languages  :: MultiGrammar -> [Language]
 categories :: MultiGrammar -> [Category]
 startCat   :: MultiGrammar -> Category
 ---------------------------------------------------
 -- Implementation
 ---------------------------------------------------
 file2grammar f =
  readFileIf f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer
 linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize mgr (CId lang)
 parse mgr lang cat s = [] 
 {-
  map tree2exp . 
  errVal [] . 
  parseString (stateOptions sgr) sgr cfcat
 where
   sgr   = stateGrammarOfLang mgr (zIdent lang)
   cfcat = string2CFCat abs cat
   abs   = maybe (error "no abstract syntax") prIdent $ abstract mgr
 -}
 linearizeAll mgr = map snd . linearizeAllLang mgr
 linearizeAllLang mgr t = [(lang,linearThis mgr lang t) | lang <- languages mgr]
 {-
 parseAll mgr cat = map snd . parseAllLang mgr cat
 parseAllLang mgr cat s = 
  [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
 -}
 readTree _ = err (const exp0) id . (pExp . myLexer)
 showTree t = printTree t
 languages mgr = [l | CId l <- cncnames mgr]
 categories mgr = [c | CId c <- Map.keys (cats (abstract mgr))]
 startCat mgr = "S" ----
 ------------ for internal use only
 linearThis = GF.Canon.GFCC.GFCCAPI.linearize
 err f g ex = case ex of
  Ok x -> g x
  Bad s -> f s
 readFileIf f = do
  b <- doesFileExist f
  if b then readFile f 
       else putStrLn ("file " ++ f ++ " not found") >> return ""
--- a/src/GF/Canon/GFCC/ParGFCC.hs
+++ b/src/GF/Canon/GFCC/ParGFCC.hs
@@ -448,14 +448,14 @@ happyReduction_42 happy_x_2
 	happy_x_1
 	 =  case happyOut24 happy_x_2 of { happy_var_2 -> 
 	happyIn36
-		 (V happy_var_2
+		 (V (fromInteger happy_var_2) --H
 	)}
 happyReduce_43 = happySpecReduce_1 13# happyReduction_43
 happyReduction_43 happy_x_1
 	 =  case happyOut24 happy_x_1 of { happy_var_1 -> 
 	happyIn36
-		 (C happy_var_1
+		 (C (fromInteger happy_var_1) --H
 	)}
 happyReduce_44 = happySpecReduce_1 13# happyReduction_44
--- a/src/GF/Canon/GFCC/Shell.hs
+++ b/src/GF/Canon/GFCC/Shell.hs
@@ -0,0 +1,62 @@
 module Main where
 import GF.Canon.GFCC.GFCCAPI
 import qualified GF.Canon.GFCC.GenGFCC as G ---
 import GF.Canon.GFCC.AbsGFCC (CId(CId)) ---
 import System.Random (newStdGen)
 import System (getArgs)
 -- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007
 main :: IO ()
 main = do
  file:_  <- getArgs
  grammar <- file2grammar file
  putStrLn $ "languages:  " ++ unwords (languages grammar)
  putStrLn $ "categories: " ++ unwords (categories grammar)
  loop grammar
 loop :: MultiGrammar -> IO ()
 loop grammar = do
  s <- getLine
  if s == "quit" then return () else do
    treat grammar s
    loop grammar
 treat :: MultiGrammar -> String -> IO ()
 treat grammar s = case words s of
  "gt":cat:n:_ -> do
    mapM_ prlinonly $ take (read n) $ G.generate grammar (CId cat)
  "gtt":cat:n:_ -> do
    mapM_ prlin $ take (read n) $ G.generate grammar (CId cat)
  "gr":cat:n:_ -> do
    gen <- newStdGen
    mapM_ prlinonly $ take (read n) $ G.generateRandom gen grammar (CId cat)
  "grt":cat:n:_ -> do
    gen <- newStdGen
    mapM_ prlin $ take (read n) $ G.generateRandom gen grammar (CId cat)
  "p":lang:cat:ws -> do
    let ts = parse grammar lang cat $ unwords ws
    mapM_ (putStrLn . showTree) ts 
  "search":cat:n:ws -> do
    case G.parse (read n) grammar (CId cat) ws of
      t:_ -> prlin t
      _ -> putStrLn "no parse found" 
  _ -> lins $ readTree grammar s
 where
  langs = languages grammar
  lins t = mapM_ (lint t) $ langs 
  lint t lang = do
 ----    putStrLn $ showTree $ linExp grammar lang t 
    lin t lang
  lin t lang = do
    putStrLn $ linearize grammar lang t
  prlins t = do
    putStrLn $ showTree t
    lins t
  prlin t = do
    putStrLn $ showTree t
    prlinonly t
  prlinonly t = mapM_ (lin t) $ langs
--- a/src/Makefile
+++ b/src/Makefile
@@ -192,7 +192,7 @@ tools/$(GF_DOC_EXE): tools/GFDoc.hs
 	$(GHMAKE) $(GHCOPTFLAGS) -o $@ $^
 gfcc:
-	$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/RunGFCC.hs
+	$(GHMAKE) $(GHCOPTFLAGS) -o gfcc GF/Canon/GFCC/Shell.hs
 	strip gfcc
 	mv gfcc ../bin/