1
0
forked from GitHub/gf-core

new GFCC format in GF/GFCC

This commit is contained in:
aarne
2007-10-04 21:38:59 +00:00
parent acc32ec199
commit 48ebf562b9
18 changed files with 2908 additions and 102 deletions

View File

@@ -3,16 +3,13 @@ module Main where
import GF.Devel.Compile
import GF.Devel.GrammarToGFCC
import GF.Devel.OptimizeGFCC
import GF.Canon.GFCC.CheckGFCC
import GF.Canon.GFCC.PrintGFCC
import GF.Canon.GFCC.DataGFCC
import GF.GFCC.CheckGFCC
import GF.GFCC.DataGFCC
import GF.Devel.UseIO
import GF.Infra.Option
---import GF.Devel.PrGrammar ---
import System
main = do
xx <- getArgs
let (opts,fs) = getOptions "-" xx
@@ -31,8 +28,7 @@ main = do
mapM_ (batchCompile opts) (map return fs)
putStrLn "Done."
check gc0 = do
let gfcc = mkGFCC gc0
check gfcc = do
(gc,b) <- checkGFCC gfcc
putStrLn $ if b then "OK" else "Corrupted GFCC"
return gc

View File

@@ -1,73 +0,0 @@
Grm. Grammar ::= Header ";" Abstract ";" [Concrete] ;
Hdr. Header ::= "grammar" CId "(" [CId] ")" ;
Abs. Abstract ::=
"abstract" "{"
"flags" [Flag]
"cat" [CatDef]
"fun" [FunDef]
"}" ;
Cnc. Concrete ::=
"concrete" CId "{"
"flags" [Flag]
"oper" [LinDef]
"lincat" [LinDef]
"lindef" [LinDef]
"lin" [LinDef]
"}" ;
Flg. Flag ::= CId "=" String ;
Cat. CatDef ::= CId [Hypo] ;
Fun. FunDef ::= CId ":" Type "=" Exp ;
Lin. LinDef ::= CId "=" Term ;
Hyp. Hypo ::= "(" CId ":" Type ")" ;
FTyp. Type ::= [CId] "->" CId ; -- simple type
DTyp. Type ::= "[" [Hypo] "->" Type "]" ; -- dep. product type
BTyp. Type ::= "(" CId [Exp] ")" ; -- dep. basic type
Tr. Exp ::= "(" Atom [Exp] ")" ; -- ordinary term
DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ; -- term with bindings
AC. Atom ::= CId ;
AS. Atom ::= String ;
AI. Atom ::= Integer ;
AF. Atom ::= Double ;
AM. Atom ::= "?" ;
trA. Exp ::= Atom ;
define trA a = Tr a [] ;
R. Term ::= "[" [Term] "]" ; -- record/table
P. Term ::= "(" Term "!" Term ")" ; -- projection/selection
S. Term ::= "(" [Term] ")" ; -- sequence with ++
K. Term ::= Tokn ; -- token
V. Term ::= "$" Integer ; -- argument
C. Term ::= Integer ; -- parameter value/label
F. Term ::= CId ; -- global constant
FV. Term ::= "[|" [Term] "|]" ; -- free variation
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
TM. Term ::= "?" ; -- lin of metavariable
KS. Tokn ::= String ;
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
Var. Variant ::= [String] "/" [String] ;
terminator Concrete ";" ;
terminator Flag ";" ;
terminator CatDef ";" ;
terminator FunDef ";" ;
terminator LinDef ";" ;
terminator Hypo "" ;
separator CId "," ;
separator Term "," ;
terminator Exp "" ;
terminator String "" ;
separator Variant "," ;
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;

View File

@@ -3,8 +3,8 @@ module GF.Devel.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc) where
import GF.Grammar.Grammar
import qualified GF.Grammar.Lookup as Look
import qualified GF.Canon.GFCC.AbsGFCC as C
import qualified GF.Canon.GFCC.PrintGFCC as Pr
import qualified GF.GFCC.AbsGFCC as C
import qualified GF.GFCC.DataGFCC as D
import qualified GF.Grammar.Abstract as A
import qualified GF.Grammar.Macros as GM
import qualified GF.Grammar.Compute as Compute
@@ -26,10 +26,10 @@ import Debug.Trace ----
-- the main function: generate GFCC from GF.
prGrammar2gfcc :: Options -> String -> SourceGrammar -> (String,String)
prGrammar2gfcc opts cnc gr = (abs, Pr.printTree gc) where
prGrammar2gfcc opts cnc gr = (abs, D.printGFCC gc) where
(abs,gc) = mkCanon2gfcc opts cnc gr
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,C.Grammar)
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.GFCC)
mkCanon2gfcc opts cnc gr =
(prIdent abs, (canon2gfcc opts . reorder abs . utf8Conv . canon2canon abs) gr)
where
@@ -38,27 +38,39 @@ mkCanon2gfcc opts cnc gr =
-- Generate GFCC from GFCM.
-- this assumes a grammar translated by canon2canon
canon2gfcc :: Options -> SourceGrammar -> C.Grammar
canon2gfcc :: Options -> SourceGrammar -> D.GFCC
canon2gfcc opts cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
(if (oElem (iOpt "show_canon") opts) then trace (prGrammar cgr) else id) $
C.Grm (C.Hdr (i2i a) cs) (C.Abs adefs) cncs
D.GFCC an cns abs cncs
where
cs = map (i2i . fst) cms
adefs = [C.Fun f' (mkType ty) (C.Tr (C.AC f') []) |
(f,AbsFun (Yes ty) _) <- tree2list (M.jments abm), let f' = i2i f]
cncs = [C.Cnc (i2i lang) (concr m) | (lang,M.ModMod m) <- cms]
concr mo = cats mo ++ lindefs mo ++
[C.Lin (i2i f) (mkTerm tr) |
(f,CncFun _ (Yes tr) _) <- tree2list (M.jments mo)]
cats mo = [C.Lin (i2ic c) (mkCType ty) |
(c,CncCat (Yes ty) _ _) <- tree2list (M.jments mo)]
lindefs mo = [C.Lin (i2id c) (mkTerm tr) |
(c,CncCat _ (Yes tr) _) <- tree2list (M.jments mo)]
an = (i2i a)
cns = map (i2i . fst) cms
abs = D.Abstr aflags funs cats catfuns
aflags = Map.fromAscList [] ---- flags
lfuns = [(f', (mkType ty,C.Tr (C.AC f') [])) | ---- defs
(f,AbsFun (Yes ty) _) <- tree2list (M.jments abm), let f' = i2i f]
funs = Map.fromAscList lfuns
lcats = [(i2i c,[]) | ---- context
(c,AbsCat _ _) <- tree2list (M.jments abm)]
cats = Map.fromAscList lcats
catfuns = Map.fromAscList
[(cat,[f | (f, (C.Typ _ c,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
cncs = Map.fromList [mkConcr (i2i lang) mo | (lang,M.ModMod mo) <- cms]
mkConcr lang mo = (lang,D.Concr flags lins opers lincats lindefs printnames)
where
flags = Map.fromAscList [] ---- flags
opers = Map.fromAscList [] -- opers will be created as optimization
lins = Map.fromAscList
[(i2i f, mkTerm tr) | (f,CncFun _ (Yes tr) _) <- tree2list (M.jments mo)]
lincats = Map.fromAscList
[(i2i c, mkCType ty) | (c,CncCat (Yes ty) _ _) <- tree2list (M.jments mo)]
lindefs = Map.fromAscList
[(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- tree2list (M.jments mo)]
printnames = Map.fromAscList [] ---- printnames
i2i :: Ident -> C.CId
i2i (IC c) = C.CId c
i2ic (IC c) = C.CId ("__" ++ c) -- for lincat of category symbols
i2id (IC c) = C.CId ("_d" ++ c) -- for lindef of category symbols
mkType :: A.Type -> C.Type
mkType t = case GM.catSkeleton t of

View File

@@ -1,41 +1,36 @@
module GF.Devel.OptimizeGFCC where
import qualified GF.Canon.GFCC.AbsGFCC as C
import qualified GF.Canon.GFCC.DataGFCC as D
import qualified GF.Canon.GFCC.PrintGFCC as Pr
import GF.GFCC.AbsGFCC
import GF.GFCC.DataGFCC
import qualified GF.Infra.Option as O
import GF.Infra.Option
import GF.Data.Operations
import Data.List
import Data.Char (isDigit)
import qualified Data.Map as Map
import Debug.Trace ----
-- back-end optimization:
-- suffix analysis followed by common subexpression elimination
optGFCC :: D.GFCC -> D.GFCC
optGFCC :: GFCC -> GFCC
optGFCC gfcc = gfcc {
D.concretes =
Map.fromAscList
[(lang, (opt cnc)) | (lang,cnc) <- Map.assocs (D.concretes gfcc)]
concretes = Map.map opt (concretes gfcc)
}
where
opt cnc = Map.fromAscList $ subex [(f,optTerm t) | (f,t) <- Map.assocs cnc]
opt cnc = subex $ cnc {
lins = Map.map optTerm (lins cnc),
lindefs = Map.map optTerm (lindefs cnc),
printnames = Map.map optTerm (printnames cnc)
}
-- analyse word form lists into prefix + suffixes
-- suffix sets can later be shared by subex elim
optTerm :: C.Term -> C.Term
optTerm :: Term -> Term
optTerm tr = case tr of
C.R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | C.K (C.KS s) <- ts]
C.R ts -> C.R $ map optTerm ts
C.P t v -> C.P (optTerm t) v
C.L x t -> C.L x (optTerm t)
R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | K (KS s) <- ts]
R ts -> R $ map optTerm ts
P t v -> P (optTerm t) v
_ -> tr
where
optToks ss = prf : suffs where
@@ -45,67 +40,67 @@ optTerm tr = case tr of
s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss
_ -> cand
isK t = case t of
C.K (C.KS _) -> True
K (KS _) -> True
_ -> False
mkSuff ("":ws) = C.R (map (C.K . C.KS) ws)
mkSuff (p:ws) = C.W p (C.R (map (C.K . C.KS) ws))
mkSuff ("":ws) = R (map (K . KS) ws)
mkSuff (p:ws) = W p (R (map (K . KS) ws))
-- common subexpression elimination; see ./Subexpression.hs for the idea
-- common subexpression elimination
subex :: [(C.CId,C.Term)] -> [(C.CId,C.Term)]
subex js = errVal js $ do
(tree,_) <- appSTM (getSubtermsMod js) (Map.empty,0)
return $ addSubexpConsts tree js
---subex :: [(CId,Term)] -> [(CId,Term)]
subex :: Concr -> Concr
subex cnc = errVal cnc $ do
(tree,_) <- appSTM (getSubtermsMod cnc) (Map.empty,0)
return $ addSubexpConsts tree cnc
type TermList = Map.Map C.Term (Int,Int) -- number of occs, id
type TermList = Map.Map Term (Int,Int) -- number of occs, id
type TermM a = STM (TermList,Int) a
addSubexpConsts :: TermList -> [(C.CId,C.Term)] -> [(C.CId,C.Term)]
addSubexpConsts tree lins =
let opers = sortBy (\ (f,_) (g,_) -> compare f g)
[(fid id, trm) | (trm,(_,id)) <- list]
in map mkOne $ opers ++ lins
addSubexpConsts :: TermList -> Concr -> Concr
addSubexpConsts tree cnc = cnc {
opers = Map.fromList [(f,recomp f trm) | (f,trm) <- ops],
lins = rec lins,
lindefs = rec lindefs,
printnames = rec printnames
}
where
ops = [(fid id, trm) | (trm,(_,id)) <- Map.assocs tree]
mkOne (f,trm) = (f, recomp f trm)
recomp f t = case Map.lookup t tree of
Just (_,id) | fid id /= f -> C.F $ fid id -- not to replace oper itself
Just (_,id) | fid id /= f -> F $ fid id -- not to replace oper itself
_ -> case t of
C.R ts -> C.R $ map (recomp f) ts
C.S ts -> C.S $ map (recomp f) ts
C.W s t -> C.W s (recomp f t)
C.P t p -> C.P (recomp f t) (recomp f p)
C.RP t p -> C.RP (recomp f t) (recomp f p)
C.L x t -> C.L x (recomp f t)
R ts -> R $ map (recomp f) ts
S ts -> S $ map (recomp f) ts
W s t -> W s (recomp f t)
P t p -> P (recomp f t) (recomp f p)
_ -> t
fid n = C.CId $ "_" ++ show n
list = Map.toList tree
fid n = CId $ "_" ++ show n
rec field = Map.fromAscList [(f,recomp f trm) | (f,trm) <- Map.assocs (field cnc)]
getSubtermsMod :: [(C.CId,C.Term)] -> TermM TermList
getSubtermsMod js = do
mapM (getInfo collectSubterms) js
getSubtermsMod :: Concr -> TermM TermList
getSubtermsMod cnc = do
mapM getSubterms (Map.assocs (lins cnc))
mapM getSubterms (Map.assocs (lindefs cnc))
mapM getSubterms (Map.assocs (printnames cnc))
(tree0,_) <- readSTM
return $ Map.filter (\ (nu,_) -> nu > 1) tree0
where
getInfo get (f,trm) = do
get trm
return ()
getSubterms (f,trm) = collectSubterms trm >> return ()
collectSubterms :: C.Term -> TermM ()
collectSubterms :: Term -> TermM ()
collectSubterms t = case t of
C.R ts -> do
R ts -> do
mapM collectSubterms ts
add t
C.RP u v -> do
collectSubterms v
add t
C.S ts -> do
S ts -> do
mapM collectSubterms ts
add t
C.W s u -> do
W s u -> do
collectSubterms u
add t
C.P p u -> do
P p u -> do
collectSubterms p
collectSubterms u
add t

67
src/GF/Devel/Shell.hs Normal file
View File

@@ -0,0 +1,67 @@
module Main where
import GF.GFCC.API
import System.Random (newStdGen)
import System (getArgs)
import Data.Char (isDigit)
-- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007
main :: IO ()
main = do
file:_ <- getArgs
grammar <- file2grammar file
printHelp grammar
loop grammar
loop :: MultiGrammar -> IO ()
loop grammar = do
s <- getLine
if s == "q" then return () else do
treat grammar s
loop grammar
printHelp grammar = do
putStrLn $ "languages: " ++ unwords (languages grammar)
putStrLn $ "categories: " ++ unwords (categories grammar)
putStrLn commands
commands = unlines [
"Commands:",
" (gt | gtt | gr | grt) Cat Num - generate all or random",
" p Lang Cat String - parse (unquoted) string",
" l Tree - linearize in all languages",
" h - help",
" q - quit"
]
treat :: MultiGrammar -> String -> IO ()
treat mgr s = case words s of
"gt" :cat:n:_ -> mapM_ prlinonly $ take (read1 n) $ generateAll mgr cat
"gtt":cat:n:_ -> mapM_ prlin $ take (read1 n) $ generateAll mgr cat
"gr" :cat:n:_ -> generateRandom mgr cat >>= mapM_ prlinonly . take (read1 n)
"grt":cat:n:_ -> generateRandom mgr cat >>= mapM_ prlin . take (read1 n)
"p":lang:cat:ws -> do
let ts = parse mgr lang cat $ unwords ws
mapM_ (putStrLn . showTree) ts
"h":_ -> printHelp mgr
_ -> lins $ readTree mgr s
where
grammar = gfcc mgr
langs = languages mgr
lins t = mapM_ (lint t) $ langs
lint t lang = do
---- putStrLn $ showTree $ linExp grammar lang t
lin t lang
lin t lang = do
putStrLn $ linearize mgr lang t
prlins t = do
putStrLn $ showTree t
lins t
prlin t = do
putStrLn $ showTree t
prlinonly t
prlinonly t = mapM_ (lin t) $ langs
read1 s = if all isDigit s then read s else 1