1
0
forked from GitHub/gf-core

removed Canon/GFCC

This commit is contained in:
aarne
2007-10-05 13:38:10 +00:00
parent 074ef6e09f
commit a41b07f9c9
21 changed files with 0 additions and 2755 deletions

View File

@@ -1,70 +0,0 @@
module GF.Canon.GFCC.AbsGFCC where
-- Haskell module generated by the BNF converter
newtype CId = CId String deriving (Eq,Ord,Show)
data Grammar =
Grm Header Abstract [Concrete]
deriving (Eq,Ord,Show)
data Header =
Hdr CId [CId]
deriving (Eq,Ord,Show)
data Abstract =
Abs [AbsDef]
deriving (Eq,Ord,Show)
data Concrete =
Cnc CId [CncDef]
deriving (Eq,Ord,Show)
data AbsDef =
Fun CId Type Exp
deriving (Eq,Ord,Show)
data CncDef =
Lin CId Term
deriving (Eq,Ord,Show)
data Type =
Typ [CId] CId
deriving (Eq,Ord,Show)
data Exp =
Tr Atom [Exp]
deriving (Eq,Ord,Show)
data Atom =
AC CId
| AS String
| AI Integer
| AF Double
| AM
deriving (Eq,Ord,Show)
data Term =
R [Term]
| P Term Term
| S [Term]
| K Tokn
| V Int
| C Int
| F CId
| FV [Term]
| W String Term
| RP Term Term
| TM
| L CId Term
| BV CId
deriving (Eq,Ord,Show)
data Tokn =
KS String
| KP [String] [Variant]
deriving (Eq,Ord,Show)
data Variant =
Var [String] [String]
deriving (Eq,Ord,Show)

View File

@@ -1,170 +0,0 @@
module GF.Canon.GFCC.CheckGFCC where
import GF.Canon.GFCC.DataGFCC
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.PrintGFCC
import GF.Canon.GFCC.ErrM
import qualified Data.Map as Map
import Control.Monad
andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool
andMapM f xs = mapM f xs >>= return . and
labelBoolIO :: String -> IO (x,Bool) -> IO (x,Bool)
labelBoolIO msg iob = do
(x,b) <- iob
if b then return (x,b) else (putStrLn msg >> return (x,b))
checkGFCC :: GFCC -> IO (GFCC,Bool)
checkGFCC gfcc = do
(cs,bs) <- mapM (checkConcrete gfcc)
(Map.assocs (concretes gfcc)) >>= return . unzip
return (gfcc {concretes = Map.fromAscList cs}, and bs)
checkConcrete :: GFCC -> (CId,Concr) -> IO ((CId,Concr),Bool)
checkConcrete gfcc (lang,cnc) =
labelBoolIO ("happened in language " ++ printTree lang) $ do
(rs,bs) <- mapM checkl (Map.assocs cnc) >>= return . unzip
return ((lang,Map.fromAscList rs),and bs)
where
checkl r@(CId f,_) = case head f of
'_' -> return (r,True)
_ -> checkLin gfcc lang r
checkLin :: GFCC -> CId -> (CId,Term) -> IO ((CId,Term),Bool)
checkLin gfcc lang (f,t) =
labelBoolIO ("happened in function " ++ printTree f) $ do
(t',b) <- checkTerm (lintype gfcc lang f) t --- $ inline gfcc lang t
return ((f,t'),b)
inferTerm :: [Tpe] -> Term -> Err (Term,Tpe)
inferTerm args trm = case trm of
K _ -> returnt str
C i -> returnt $ ints i
V i -> do
testErr (i < length args) ("too large index " ++ show i)
returnt $ args !! i
S ts -> do
(ts',tys) <- mapM infer ts >>= return . unzip
let tys' = filter (/=str) tys
testErr (null tys')
("expected Str in " ++ prt trm ++ " not " ++ unwords (map prt tys'))
return (S ts',str)
R ts -> do
(ts',tys) <- mapM infer ts >>= return . unzip
return $ (R ts',tuple tys)
P t u -> do
(t',tt) <- infer t
(u',tu) <- infer u
case tt of
R tys -> case tu of
R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]]
--- R [v] -> infer $ P t v
--- R (v:vs) -> infer $ P (head tys) (R vs)
C i -> do
testErr (i < length tys)
("required more than " ++ show i ++ " fields in " ++ prt (R tys))
return (P t' u', tys !! i) -- record: index must be known
_ -> do
let typ = head tys
testErr (all (==typ) tys) ("different types in table " ++ prt trm)
return (P t' u', typ) -- table: types must be same
_ -> Bad $ "projection from " ++ prt t ++ " : " ++ prt tt
FV [] -> returnt str ----
FV (t:ts) -> do
(t',ty) <- infer t
(ts',tys) <- mapM infer ts >>= return . unzip
testErr (all (==ty) tys) ("different types in variants " ++ prt trm)
return (FV (t':ts'),ty)
W s r -> infer r
_ -> Bad ("no type inference for " ++ prt trm)
where
returnt ty = return (trm,ty)
infer = inferTerm args
prt = printTree
checkTerm :: LinType -> Term -> IO (Term,Bool)
checkTerm (args,val) trm = case inferTerm args trm of
Ok (t,ty) -> if eqType ty val
then return (t,True)
else do
putStrLn $ "term: " ++ printTree trm ++
"\nexpected type: " ++ printTree val ++
"\ninferred type: " ++ printTree ty
return (t,False)
Bad s -> do
putStrLn s
return (trm,False)
eqType :: Tpe -> Tpe -> Bool
eqType inf exp = case (inf,exp) of
(C k, C n) -> k <= n -- only run-time corr.
(R rs,R ts) -> length rs == length ts && and [eqType r t | (r,t) <- zip rs ts]
_ -> inf == exp
-- should be in a generic module, but not in the run-time DataGFCC
type Tpe = Term
type LinType = ([Tpe],Tpe)
tuple :: [Tpe] -> Tpe
tuple = R
ints :: Int -> Tpe
ints = C
str :: Tpe
str = S []
lintype :: GFCC -> CId -> CId -> LinType
lintype gfcc lang fun = case lookType gfcc fun of
Typ cs c -> (map linc cs, linc c)
where
linc = lookLincat gfcc lang
lookLincat :: GFCC -> CId -> CId -> Term
lookLincat gfcc lang (CId cat) = lookLin gfcc lang (CId ("__" ++ cat))
linRules :: Map.Map CId Term -> [(CId,Term)]
linRules cnc = [(f,t) | (f@(CId (c:_)),t) <- Map.assocs cnc, c /= '_'] ----
inline :: GFCC -> CId -> Term -> Term
inline gfcc lang t = case t of
F c -> inl $ look c
_ -> composSafeOp inl t
where
inl = inline gfcc lang
look = lookLin gfcc lang
composOp :: Monad m => (Term -> m Term) -> Term -> m Term
composOp f trm = case trm of
R ts -> liftM R $ mapM f ts
S ts -> liftM S $ mapM f ts
FV ts -> liftM FV $ mapM f ts
P t u -> liftM2 P (f t) (f u)
W s t -> liftM (W s) $ f t
_ -> return trm
composSafeOp :: (Term -> Term) -> Term -> Term
composSafeOp f = maybe undefined id . composOp (return . f)
-- from GF.Data.Oper
maybeErr :: String -> Maybe a -> Err a
maybeErr s = maybe (Bad s) Ok
testErr :: Bool -> String -> Err ()
testErr cond msg = if cond then return () else Bad msg
errVal :: a -> Err a -> a
errVal a = err (const a) id
errIn :: String -> Err a -> Err a
errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return
err :: (String -> b) -> (a -> b) -> Err a -> b
err d f e = case e of
Ok a -> f a
Bad s -> d s

View File

@@ -1,148 +0,0 @@
module GF.Canon.GFCC.DataGFCC where
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.PrintGFCC
import Data.Map
import Data.List
import Debug.Trace ----
data GFCC = GFCC {
absname :: CId ,
cncnames :: [CId] ,
abstract :: Abstr ,
concretes :: Map CId Concr
}
-- redundant double representation for fast lookup
data Abstr = Abstr {
funs :: Map CId Type, -- find the type of a fun
cats :: Map CId [CId] -- find the funs giving a cat
}
statGFCC :: GFCC -> String
statGFCC gfcc = unlines [
"Abstract\t" ++ pr (absname gfcc),
"Concretes\t" ++ unwords (Prelude.map pr (cncnames gfcc)),
"Categories\t" ++ unwords (Prelude.map pr (keys (cats (abstract gfcc))))
]
where pr (CId s) = s
type Concr = Map CId Term
lookMap :: (Show i, Ord i) => a -> i -> Map i a -> a
lookMap d c m = maybe d id $ Data.Map.lookup c m
lookLin :: GFCC -> CId -> CId -> Term
lookLin mcfg lang fun =
lookMap TM fun $ lookMap undefined lang $ concretes mcfg
-- | Look up the type of a function.
lookType :: GFCC -> CId -> Type
lookType gfcc f = lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
linearize :: GFCC -> CId -> Exp -> String
linearize mcfg lang = realize . linExp mcfg lang
realize :: Term -> String
realize trm = case trm of
R ts -> realize (ts !! 0)
S ss -> unwords $ Prelude.map realize ss
K t -> case t of
KS s -> s
KP s _ -> unwords s ---- prefix choice TODO
W s t -> s ++ realize t
FV ts -> realize (ts !! 0) ---- other variants TODO
RP _ r -> realize r
TM -> "?"
_ -> "ERROR " ++ show trm ---- debug
linExp :: GFCC -> CId -> Exp -> Term
linExp mcfg lang tree@(Tr at trees) =
case at of
AC fun -> comp (Prelude.map lin trees) $ look fun
AS s -> R [kks (show s)] -- quoted
AI i -> R [kks (show i)]
AF d -> R [kks (show d)]
AM -> TM
where
lin = linExp mcfg lang
comp = compute mcfg lang
look = lookLin mcfg lang
exp0 :: Exp
exp0 = Tr (AS "NO_PARSE") []
term0 :: CId -> Term
term0 (CId s) = R [kks ("#" ++ s ++ "#")]
kks :: String -> Term
kks = K . KS
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute mcfg lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
RP i t -> RP (comp i) (comp t)
W s t -> W s (comp t)
R ts -> R $ Prelude.map comp ts
V i -> idx args i -- already computed
F c -> comp $ look c -- not computed (if contains argvar)
FV ts -> FV $ Prelude.map comp ts
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -> trm
look = lookLin mcfg lang
idx xs i = if i > length xs - 1
then trace
("too large " ++ show i ++ " for\n" ++ unlines (Prelude.map prt xs) ++ "\n") TM
else xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ Prelude.map (proj r) ts
(FV ts, _ ) -> FV $ Prelude.map (\t -> proj t r) ts
(W s t, _) -> kks (s ++ getString (proj t p))
(_,R is) -> trace ("projection " ++ show p ++ "\n") $ comp $ foldl P r is
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> i
RP p _ -> getIndex p
TM -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -> idx rs i
RP _ r -> getField r i
TM -> TM
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
prt = printTree
mkGFCC :: Grammar -> GFCC
mkGFCC (Grm (Hdr a cs) ab@(Abs funs) ccs) = GFCC {
absname = a,
cncnames = cs,
abstract =
let
fs = fromAscList [(fun,typ) | Fun fun typ _ <- funs]
cats = sort $ nub [c | Fun f (Typ _ c) _ <- funs]
cs = fromAscList
[(cat,[f | Fun f (Typ _ c) _ <- funs, c==cat]) | cat <- cats]
in Abstr fs cs,
concretes = fromAscList [(lang, mkCnc lins) | Cnc lang lins <- ccs]
}
where
mkCnc lins = fromList [(fun,lin) | Lin fun lin <- lins] ---- Asc
printGFCC :: GFCC -> String
printGFCC gfcc = printTree $ Grm
(Hdr (absname gfcc) (cncnames gfcc))
(Abs [Fun f ty (Tr (AC f) []) | (f,ty) <- assocs (funs (abstract gfcc))])
[Cnc lang [Lin f t | (f,t) <- assocs lins] |
(lang,lins) <- assocs (concretes gfcc)]

View File

@@ -1,16 +0,0 @@
-- BNF Converter: Error Monad
-- Copyright (C) 2004 Author: Aarne Ranta
-- This file comes with NO WARRANTY and may be used FOR ANY PURPOSE.
module GF.Canon.GFCC.ErrM where
-- the Error monad: like Maybe type with error msgs
data Err a = Ok a | Bad String
deriving (Read, Show, Eq)
instance Monad Err where
return = Ok
fail = Bad
Ok a >>= f = f a
Bad s >>= f = Bad s

View File

@@ -1,50 +0,0 @@
Grm. Grammar ::= Header ";" Abstract ";" [Concrete] ;
Hdr. Header ::= "grammar" CId "(" [CId] ")" ;
Abs. Abstract ::= "abstract" "{" [AbsDef] "}" ;
Cnc. Concrete ::= "concrete" CId "{" [CncDef] "}" ;
Fun. AbsDef ::= CId ":" Type "=" Exp ;
--AFl. AbsDef ::= "%" CId "=" String ; -- flag
Lin. CncDef ::= CId "=" Term ;
--CFl. CncDef ::= "%" CId "=" String ; -- flag
Typ. Type ::= [CId] "->" CId ;
Tr. Exp ::= "(" Atom [Exp] ")" ;
AC. Atom ::= CId ;
AS. Atom ::= String ;
AI. Atom ::= Integer ;
AF. Atom ::= Double ;
AM. Atom ::= "?" ;
trA. Exp ::= Atom ;
define trA a = Tr a [] ;
R. Term ::= "[" [Term] "]" ; -- record/table
P. Term ::= "(" Term "!" Term ")" ; -- projection/selection
S. Term ::= "(" [Term] ")" ; -- sequence with ++
K. Term ::= Tokn ; -- token
V. Term ::= "$" Integer ; -- argument
C. Term ::= Integer ; -- parameter value/label
F. Term ::= CId ; -- global constant
FV. Term ::= "[|" [Term] "|]" ; -- free variation
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
TM. Term ::= "?" ; -- lin of metavariable
L. Term ::= "(" CId "->" Term ")" ; -- lambda abstracted table
BV. Term ::= "#" CId ; -- lambda-bound variable
KS. Tokn ::= String ;
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
Var. Variant ::= [String] "/" [String] ;
terminator Concrete ";" ;
terminator AbsDef ";" ;
terminator CncDef ";" ;
separator CId "," ;
separator Term "," ;
terminator Exp "" ;
terminator String "" ;
separator Variant "," ;
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;

View File

@@ -1,127 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : GFCCAPI
-- Maintainer : Aarne Ranta
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- Reduced Application Programmer's Interface to GF, meant for
-- embedded GF systems. AR 19/9/2007
-----------------------------------------------------------------------------
module GF.Canon.GFCC.GFCCAPI where
import GF.Canon.GFCC.DataGFCC
--import GF.Canon.GFCC.GenGFCC
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.ParGFCC
import GF.Canon.GFCC.PrintGFCC
import GF.Canon.GFCC.ErrM
import GF.Parsing.FCFG
import qualified GF.Canon.GFCC.GenGFCC as G
import GF.Conversion.SimpleToFCFG (convertGrammar,FCat(..))
--import GF.Data.Operations
--import GF.Infra.UseIO
import qualified Data.Map as Map
import System.Random (newStdGen)
import System.Directory (doesFileExist)
-- This API is meant to be used when embedding GF grammars in Haskell
-- programs. The embedded system is supposed to use the
-- .gfcm grammar format, which is first produced by the gf program.
---------------------------------------------------
-- Interface
---------------------------------------------------
data MultiGrammar = MultiGrammar {gfcc :: GFCC, parsers :: [(Language,FCFPInfo)]}
type Language = String
type Category = String
type Tree = Exp
file2grammar :: FilePath -> IO MultiGrammar
linearize :: MultiGrammar -> Language -> Tree -> String
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
linearizeAll :: MultiGrammar -> Tree -> [String]
linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
generateAll :: MultiGrammar -> Category -> [Tree]
generateRandom :: MultiGrammar -> Category -> IO [Tree]
readTree :: MultiGrammar -> String -> Tree
showTree :: Tree -> String
languages :: MultiGrammar -> [Language]
categories :: MultiGrammar -> [Category]
startCat :: MultiGrammar -> Category
---------------------------------------------------
-- Implementation
---------------------------------------------------
file2grammar f = do
gfcc <- file2gfcc f
let fcfgs = convertGrammar gfcc
return (MultiGrammar gfcc [(lang, buildFCFPInfo fcfg) | (CId lang,fcfg) <- fcfgs])
file2gfcc f =
readFileIf f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer
linearize mgr lang = GF.Canon.GFCC.DataGFCC.linearize (gfcc mgr) (CId lang)
parse mgr lang cat s =
case lookup lang (parsers mgr) of
Nothing -> error "no parser"
Just pinfo -> case parseFCF "bottomup" pinfo (CId cat) (words s) of
Ok x -> x
Bad s -> error s
linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t =
[(lang,linearThis mgr lang t) | lang <- languages mgr]
parseAll mgr cat = map snd . parseAllLang mgr cat
parseAllLang mgr cat s =
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
generateRandom mgr cat = do
gen <- newStdGen
return $ G.generateRandom gen (gfcc mgr) (CId cat)
generateAll mgr cat = G.generate (gfcc mgr) (CId cat)
readTree _ = err (const exp0) id . (pExp . myLexer)
showTree t = printTree t
languages mgr = [l | CId l <- cncnames (gfcc mgr)]
categories mgr = [c | CId c <- Map.keys (cats (abstract (gfcc mgr)))]
startCat mgr = "S" ----
------------ for internal use only
linearThis = GF.Canon.GFCC.GFCCAPI.linearize
err f g ex = case ex of
Ok x -> g x
Bad s -> f s
readFileIf f = do
b <- doesFileExist f
if b then readFile f
else putStrLn ("file " ++ f ++ " not found") >> return ""

View File

@@ -1,212 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : GrammarToHaskell
-- Maintainer : Aarne Ranta
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 12:39:07 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
--
-- to write a GF abstract grammar into a Haskell module with translations from
-- data objects into GF trees. Example: GSyntax for Agda.
-- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004
-----------------------------------------------------------------------------
module GF.Canon.GFCC.GFCCToHaskell (grammar2haskell, grammar2haskellGADT) where
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.DataGFCC
import GF.Data.Operations
import Data.List --(isPrefixOf, find, intersperse)
import qualified Data.Map as Map
-- | the main function
grammar2haskell :: GFCC -> String
grammar2haskell gr = foldr (++++) [] $
haskPreamble ++ [datatypes gr', gfinstances gr', fginstances gr']
where gr' = hSkeleton gr
grammar2haskellGADT :: GFCC -> String
grammar2haskellGADT gr = foldr (++++) [] $
["{-# OPTIONS_GHC -fglasgow-exts #-}"] ++
haskPreamble ++ [datatypesGADT gr', gfinstances gr', fginstances gr']
where gr' = hSkeleton gr
-- | by this you can prefix all identifiers with stg; the default is 'G'
gId :: OIdent -> OIdent
gId i = 'G':i
haskPreamble =
[
"module GSyntax where",
"",
"import GF.Canon.GFCC.AbsGFCC",
"import GF.Canon.GFCC.DataGFCC",
"import GF.Data.Operations",
"----------------------------------------------------",
"-- automatic translation from GF to Haskell",
"----------------------------------------------------",
"",
"class Gf a where gf :: a -> Exp",
"class Fg a where fg :: Exp -> a",
"",
predefInst "GString" "String" "Tr (AS s) []",
"",
predefInst "GInt" "Integer" "Tr (AI s) []",
"",
predefInst "GFloat" "Double" "Tr (AF s) []",
"",
"----------------------------------------------------",
"-- below this line machine-generated",
"----------------------------------------------------",
""
]
predefInst gtyp typ patt =
"newtype" +++ gtyp +++ "=" +++ gtyp +++ typ +++ " deriving Show" +++++
"instance Gf" +++ gtyp +++ "where" ++++
" gf (" ++ gtyp +++ "s) =" +++ patt +++++
"instance Fg" +++ gtyp +++ "where" ++++
" fg t =" ++++
" case t of" ++++
" " +++ patt +++ " ->" +++ gtyp +++ "s" ++++
" _ -> error (\"no" +++ gtyp +++ "\" ++ show t)"
type OIdent = String
type HSkeleton = [(OIdent, [(OIdent, [OIdent])])]
datatypes, gfinstances, fginstances :: (String,HSkeleton) -> String
datatypes = (foldr (+++++) "") . (filter (/="")) . (map hDatatype) . snd
gfinstances (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (hInstance m)) g
fginstances (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (fInstance m)) g
hDatatype :: (OIdent, [(OIdent, [OIdent])]) -> String
hInstance, fInstance :: String -> (OIdent, [(OIdent, [OIdent])]) -> String
hDatatype ("Cn",_) = "" ---
hDatatype (cat,[]) = ""
hDatatype (cat,rules) | isListCat (cat,rules) =
"newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]"
+++ "deriving Show"
hDatatype (cat,rules) =
"data" +++ gId cat +++ "=" ++
(if length rules == 1 then "" else "\n ") +++
foldr1 (\x y -> x ++ "\n |" +++ y)
[gId f +++ foldr (+++) "" (map gId xx) | (f,xx) <- rules] ++++
" deriving Show"
-- GADT version of data types
datatypesGADT :: (String,HSkeleton) -> String
datatypesGADT (_,skel) =
unlines (concatMap hCatTypeGADT skel)
+++++
"data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT) skel)
hCatTypeGADT :: (OIdent, [(OIdent, [OIdent])]) -> [String]
hCatTypeGADT (cat,rules)
= ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_",
"data"+++gId cat++"_"]
hDatatypeGADT :: (OIdent, [(OIdent, [OIdent])]) -> [String]
hDatatypeGADT (cat, rules)
| isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t]
| otherwise =
[ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t | (f,args) <- rules ]
where t = "Tree" +++ gId cat ++ "_"
----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004
hInstance m (cat,[]) = ""
hInstance m (cat,rules)
| isListCat (cat,rules) =
"instance Gf" +++ gId cat +++ "where" ++++
" gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])"
+++ "=" +++ mkRHS ("Base"++ec) baseVars ++++
" gf (" ++ gId cat +++ "(x:xs)) = "
++ mkRHS ("Cons"++ec) ["x",prParenth (gId cat+++"xs")]
-- no show for GADTs
-- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)"
| otherwise =
"instance Gf" +++ gId cat +++ "where" ++
(if length rules == 1 then "" else "\n") +++
foldr1 (\x y -> x ++ "\n" +++ y) [mkInst f xx | (f,xx) <- rules]
where
ec = elemCat cat
baseVars = mkVars (baseSize (cat,rules))
mkInst f xx = let xx' = mkVars (length xx) in "gf " ++
(if length xx == 0 then gId f else prParenth (gId f +++ foldr1 (+++) xx')) +++
"=" +++ mkRHS f xx'
mkVars n = ["x" ++ show i | i <- [1..n]]
mkRHS f vars = "Tr (AC (CId \"" ++ f ++ "\"))" +++
"[" ++ prTList ", " ["gf" +++ x | x <- vars] ++ "]"
----fInstance m ("Cn",_) = "" ---
fInstance m (cat,[]) = ""
fInstance m (cat,rules) =
"instance Fg" +++ gId cat +++ "where" ++++
" fg t =" ++++
" case t of" ++++
foldr1 (\x y -> x ++ "\n" ++ y) [mkInst f xx | (f,xx) <- rules] ++++
" _ -> error (\"no" +++ cat ++ " \" ++ show t)"
where
mkInst f xx =
" Tr (AC (CId \"" ++ f ++ "\")) " ++
"[" ++ prTList "," xx' ++ "]" +++
"->" +++ mkRHS f xx'
where xx' = ["x" ++ show i | (_,i) <- zip xx [1..]]
mkRHS f vars
| isListCat (cat,rules) =
if "Base" `isPrefixOf` f then
gId cat +++ "[" ++ prTList ", " [ "fg" +++ x | x <- vars ] ++ "]"
else
let (i,t) = (init vars,last vars)
in "let" +++ gId cat +++ "xs = fg " ++ t +++ "in" +++
gId cat +++ prParenth (prTList ":" (["fg"+++v | v <- i] ++ ["xs"]))
| otherwise =
gId f +++
prTList " " [prParenth ("fg" +++ x) | x <- vars]
--type HSkeleton = [(OIdent, [(OIdent, [OIdent])])]
hSkeleton :: GFCC -> (String,HSkeleton)
hSkeleton gr =
(pr (absname gr),
[(pr c, [(pr f, map pr cs) | (f, Typ cs _) <- fs]) |
fs@((_, Typ _ c):_) <- fs]
)
where
fs = groupBy valtypg (sortBy valtyps (Map.assocs (funs (abstract gr))))
valtyps (_, Typ _ x) (_, Typ _ y) = compare x y
valtypg (_, Typ _ x) (_, Typ _ y) = x == y
pr (CId c) = c
updateSkeleton :: OIdent -> HSkeleton -> (OIdent, [OIdent]) -> HSkeleton
updateSkeleton cat skel rule =
case skel of
(cat0,rules):rr | cat0 == cat -> (cat0, rule:rules) : rr
(cat0,rules):rr -> (cat0, rules) : updateSkeleton cat rr rule
isListCat :: (OIdent, [(OIdent, [OIdent])]) -> Bool
isListCat (cat,rules) = "List" `isPrefixOf` cat && length rules == 2
&& ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs
where c = elemCat cat
fs = map fst rules
-- | Gets the element category of a list category.
elemCat :: OIdent -> OIdent
elemCat = drop 4
isBaseFun :: OIdent -> Bool
isBaseFun f = "Base" `isPrefixOf` f
isConsFun :: OIdent -> Bool
isConsFun f = "Cons" `isPrefixOf` f
baseSize :: (OIdent, [(OIdent, [OIdent])]) -> Int
baseSize (_,rules) = length bs
where Just (_,bs) = find (("Base" `isPrefixOf`) . fst) rules

View File

@@ -1,78 +0,0 @@
module GF.Canon.GFCC.GenGFCC where
import GF.Canon.GFCC.DataGFCC
import GF.Canon.GFCC.AbsGFCC
import qualified Data.Map as M
import System.Random
-- generate an infinite list of trees exhaustively
generate :: GFCC -> CId -> [Exp]
generate gfcc cat = concatMap (\i -> gener i cat) [0..]
where
gener 0 c = [Tr (AC f) [] | (f, Typ [] _) <- fns c]
gener i c = [
tr |
(f, Typ cs _) <- fns c,
let alts = map (gener (i-1)) cs,
ts <- combinations alts,
let tr = Tr (AC f) ts,
depth tr >= i
]
fns cat =
let fs = maybe [] id $ M.lookup cat $ cats $ abstract gfcc
in [(f,ty) | f <- fs, Just ty <- [M.lookup f $ funs $ abstract gfcc]]
depth tr = case tr of
Tr _ [] -> 1
Tr _ ts -> maximum (map depth ts) + 1
combinations :: [[a]] -> [[a]]
combinations t = case t of
[] -> [[]]
aa:uu -> [a:u | a <- aa, u <- combinations uu]
-- generate an infinite list of trees randomly
generateRandom :: StdGen -> GFCC -> CId -> [Exp]
generateRandom gen gfcc cat = genTrees (randomRs (0.0, 1.0) gen) cat where
timeout = 47 -- give up
genTrees ds0 cat =
let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds
(t,k) = genTree ds cat
in (if k>timeout then id else (t:))
(genTrees ds2 cat) -- else (drop k ds)
genTree rs = gett rs where
gett ds (CId "String") = (Tr (AS "foo") [], 1)
gett ds (CId "Int") = (Tr (AI 12345) [], 1)
gett [] _ = (Tr (AS "TIMEOUT") [], 1) ----
gett ds cat = case fns cat of
[] -> (Tr AM [],1)
fs -> let
d:ds2 = ds
(f,args) = getf d fs
(ts,k) = getts ds2 args
in (Tr (AC f) ts, k+1)
getf d fs = let lg = (length fs) in
fs !! (floor (d * fromIntegral lg))
getts ds cats = case cats of
c:cs -> let
(t, k) = gett ds c
(ts,ks) = getts (drop k ds) cs
in (t:ts, k + ks)
_ -> ([],0)
fns cat =
let fs = maybe [] id $ M.lookup cat $ cats $ abstract gfcc
in [(f,cs) | f <- fs,
Just (Typ cs _) <- [M.lookup f $ funs $ abstract gfcc]]
-- brute-force parsing method; only returns the first result
-- note: you cannot throw away rules with unknown words from the grammar
-- because it is not known which field in each rule may match the input
parse :: Int -> GFCC -> CId -> [String] -> [Exp]
parse i gfcc cat ws = [t | t <- gen, s <- lins t, words s == ws] where
gen = take i $ generate gfcc cat
lins t = [linearize gfcc lang t | lang <- cncnames gfcc]

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,190 +0,0 @@
{-# OPTIONS -fno-warn-incomplete-patterns #-}
module GF.Canon.GFCC.PrintGFCC where
-- pretty-printer generated by the BNF converter
import GF.Canon.GFCC.AbsGFCC
import Data.Char
-- the top-level printing method
printTree :: Print a => a -> String
printTree = render . prt 0
type Doc = [ShowS] -> [ShowS]
doc :: ShowS -> Doc
doc = (:)
render :: Doc -> String
render d = rend 0 (map ($ "") $ d []) "" where
rend i ss = case ss of
"[" :ts -> showChar '[' . rend i ts
"(" :ts -> showChar '(' . rend i ts
"{" :ts -> showChar '{' . new (i+1) . rend (i+1) ts
"}" : ";":ts -> new (i-1) . space "}" . showChar ';' . new (i-1) . rend (i-1) ts
"}" :ts -> new (i-1) . showChar '}' . new (i-1) . rend (i-1) ts
";" :ts -> showChar ';' . new i . rend i ts
t : "," :ts -> showString t . space "," . rend i ts
t : ")" :ts -> showString t . showChar ')' . rend i ts
t : "]" :ts -> showString t . showChar ']' . rend i ts
t :ts -> space t . rend i ts
_ -> id
new i = showChar '\n' . replicateS (2*i) (showChar ' ') . dropWhile isSpace
space t = showString t ---- . (\s -> if null s then "" else (' ':s))
parenth :: Doc -> Doc
parenth ss = doc (showChar '(') . ss . doc (showChar ')')
concatS :: [ShowS] -> ShowS
concatS = foldr (.) id
concatD :: [Doc] -> Doc
concatD = foldr (.) id
replicateS :: Int -> ShowS -> ShowS
replicateS n f = concatS (replicate n f)
-- the printer class does the job
class Print a where
prt :: Int -> a -> Doc
prtList :: [a] -> Doc
prtList = concatD . map (prt 0)
instance Print a => Print [a] where
prt _ = prtList
instance Print Char where
prt _ s = doc (showChar '\'' . mkEsc '\'' s . showChar '\'')
prtList s = doc (showChar '"' . concatS (map (mkEsc '"') s) . showChar '"')
mkEsc :: Char -> Char -> ShowS
mkEsc q s = case s of
_ | s == q -> showChar '\\' . showChar s
'\\'-> showString "\\\\"
'\n' -> showString "\\n"
'\t' -> showString "\\t"
_ -> showChar s
prPrec :: Int -> Int -> Doc -> Doc
prPrec i j = if j<i then parenth else id
instance Print Integer where
prt _ x = doc (shows x)
instance Print Int where
prt _ x = doc (shows x)
instance Print Double where
prt _ x = doc (shows x)
instance Print CId where
prt _ (CId i) = doc (showString i)
prtList es = case es of
[] -> (concatD [])
[x] -> (concatD [prt 0 x])
x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs])
instance Print Grammar where
prt i e = case e of
Grm header abstract concretes -> prPrec i 0 (concatD [prt 0 header , doc (showString ";") , prt 0 abstract , doc (showString ";") , prt 0 concretes])
instance Print Header where
prt i e = case e of
Hdr cid cids -> prPrec i 0 (concatD [doc (showString "grammar ") , prt 0 cid , doc (showString "(") , prt 0 cids , doc (showString ")")])
instance Print Abstract where
prt i e = case e of
Abs absdefs -> prPrec i 0 (concatD [doc (showString "abstract ") , doc (showString "{") , prt 0 absdefs , doc (showString "}")])
instance Print Concrete where
prt i e = case e of
Cnc cid cncdefs -> prPrec i 0 (concatD [doc (showString "concrete ") , prt 0 cid , doc (showString "{") , prt 0 cncdefs , doc (showString "}")])
prtList es = case es of
[] -> (concatD [])
x:xs -> (concatD [prt 0 x , doc (showString ";") , prt 0 xs])
instance Print AbsDef where
prt i e = case e of
Fun cid type' exp -> prPrec i 0 (concatD [prt 0 cid , doc (showString ":") , prt 0 type' , doc (showString "=") , prt 0 exp])
prtList es = case es of
[] -> (concatD [])
x:xs -> (concatD [prt 0 x , doc (showString ";") , prt 0 xs])
instance Print CncDef where
prt i e = case e of
Lin cid term -> prPrec i 0 (concatD [prt 0 cid , doc (showString "=") , prt 0 term])
prtList es = case es of
[] -> (concatD [])
x:xs -> (concatD [prt 0 x , doc (showString ";") , prt 0 xs])
instance Print Type where
prt i e = case e of
Typ cids cid -> prPrec i 0 (concatD [prt 0 cids , doc (showString "->") , prt 0 cid])
instance Print Exp where
prt i e = case e of
Tr atom exps -> prPrec i 0 (concatD [doc (showString "(") , prt 0 atom , prt 0 exps , doc (showString ")")])
prtList es = case es of
[] -> (concatD [])
x:xs -> (concatD [prt 0 x , prt 0 xs])
instance Print Atom where
prt i e = case e of
AC cid -> prPrec i 0 (concatD [prt 0 cid])
AS str -> prPrec i 0 (concatD [prt 0 str])
AI n -> prPrec i 0 (concatD [prt 0 n])
AF d -> prPrec i 0 (concatD [prt 0 d])
AM -> prPrec i 0 (concatD [doc (showString "?")])
instance Print Term where
prt i e = case e of
R terms -> prPrec i 0 (concatD [doc (showString "[") , prt 0 terms , doc (showString "]")])
P term0 term -> prPrec i 0 (concatD [doc (showString "(") , prt 0 term0 , doc (showString "!") , prt 0 term , doc (showString ")")])
S terms -> prPrec i 0 (concatD [doc (showString "(") , prt 0 terms , doc (showString ")")])
K tokn -> prPrec i 0 (concatD [prt 0 tokn])
V n -> prPrec i 0 (concatD [doc (showString "$") , prt 0 n])
C n -> prPrec i 0 (concatD [prt 0 n])
F cid -> prPrec i 0 (concatD [prt 0 cid])
FV terms -> prPrec i 0 (concatD [doc (showString "[|") , prt 0 terms , doc (showString "|]")])
W str term -> prPrec i 0 (concatD [doc (showString "(") , prt 0 str , doc (showString "+") , prt 0 term , doc (showString ")")])
RP term0 term -> prPrec i 0 (concatD [doc (showString "(") , prt 0 term0 , doc (showString "@") , prt 0 term , doc (showString ")")])
TM -> prPrec i 0 (concatD [doc (showString "?")])
L cid term -> prPrec i 0 (concatD [doc (showString "(") , prt 0 cid , doc (showString "->") , prt 0 term , doc (showString ")")])
BV cid -> prPrec i 0 (concatD [doc (showString "#") , prt 0 cid])
prtList es = case es of
[] -> (concatD [])
[x] -> (concatD [prt 0 x])
x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs])
instance Print Tokn where
prt i e = case e of
KS str -> prPrec i 0 (concatD [prt 0 str])
KP strs variants -> prPrec i 0 (concatD [doc (showString "[") , doc (showString "pre") , prt 0 strs , doc (showString "[") , prt 0 variants , doc (showString "]") , doc (showString "]")])
instance Print Variant where
prt i e = case e of
Var strs0 strs -> prPrec i 0 (concatD [prt 0 strs0 , doc (showString "/") , prt 0 strs])
prtList es = case es of
[] -> (concatD [])
[x] -> (concatD [prt 0 x])
x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs])

View File

@@ -1,75 +0,0 @@
module Main where
import GF.Canon.GFCC.GenGFCC
import GF.Canon.GFCC.DataGFCC
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.ParGFCC
import GF.Canon.GFCC.PrintGFCC
import GF.Canon.GFCC.ErrM
--import GF.Data.Operations
import Data.Map
import System.Random (newStdGen)
import System
-- Simple translation application built on GFCC. AR 7/9/2006
main :: IO ()
main = do
file:_ <- getArgs
grammar <- file2gfcc file
putStrLn $ statGFCC grammar
loop grammar
loop :: GFCC -> IO ()
loop grammar = do
s <- getLine
if s == "quit" then return () else do
treat grammar s
loop grammar
treat :: GFCC -> String -> IO ()
treat grammar s = case words s of
"gt":cat:n:_ -> do
mapM_ prlinonly $ take (read n) $ generate grammar (CId cat)
"gtt":cat:n:_ -> do
mapM_ prlin $ take (read n) $ generate grammar (CId cat)
"gr":cat:n:_ -> do
gen <- newStdGen
mapM_ prlinonly $ take (read n) $ generateRandom gen grammar (CId cat)
"grt":cat:n:_ -> do
gen <- newStdGen
mapM_ prlin $ take (read n) $ generateRandom gen grammar (CId cat)
"p":cat:n:ws -> do
case parse (read n) grammar (CId cat) ws of
t:_ -> prlin t
_ -> putStrLn "no parse found"
_ -> lins $ readExp s
where
lins t = mapM_ (lint t) $ cncnames grammar
lint t lang = do
putStrLn $ printTree $ linExp grammar lang t
lin t lang
lin t lang = do
putStrLn $ linearize grammar lang t
prlins t = do
putStrLn $ printTree t
lins t
prlin t = do
putStrLn $ printTree t
prlinonly t
prlinonly t = mapM_ (lin t) $ cncnames grammar
--- should be in an API
file2gfcc :: FilePath -> IO GFCC
file2gfcc f =
readFile f >>= err (error "no parse") (return . mkGFCC) . pGrammar . myLexer
readExp :: String -> Exp
readExp = err (const exp0) id . (pExp . myLexer)
err f g ex = case ex of
Ok x -> g x
Bad s -> f s

View File

@@ -1,74 +0,0 @@
module Main where
import GF.Canon.GFCC.GFCCAPI
import qualified GF.Canon.GFCC.GenGFCC as G ---
import GF.Canon.GFCC.AbsGFCC (CId(CId)) ---
import System.Random (newStdGen)
import System (getArgs)
import Data.Char (isDigit)
-- Simple translation application built on GFCC. AR 7/9/2006 -- 19/9/2007
main :: IO ()
main = do
file:_ <- getArgs
grammar <- file2grammar file
printHelp grammar
loop grammar
loop :: MultiGrammar -> IO ()
loop grammar = do
s <- getLine
if s == "q" then return () else do
treat grammar s
loop grammar
printHelp grammar = do
putStrLn $ "languages: " ++ unwords (languages grammar)
putStrLn $ "categories: " ++ unwords (categories grammar)
putStrLn commands
commands = unlines [
"Commands:",
" (gt | gtt | gr | grt) Cat Num - generate all or random",
" p Lang Cat String - parse (unquoted) string",
" l Tree - linearize in all languages",
" h - help",
" q - quit"
]
treat :: MultiGrammar -> String -> IO ()
treat mgr s = case words s of
"gt" :cat:n:_ -> mapM_ prlinonly $ take (read1 n) $ generateAll mgr cat
"gtt":cat:n:_ -> mapM_ prlin $ take (read1 n) $ generateAll mgr cat
"gr" :cat:n:_ -> generateRandom mgr cat >>= mapM_ prlinonly . take (read1 n)
"grt":cat:n:_ -> generateRandom mgr cat >>= mapM_ prlin . take (read1 n)
"p":lang:cat:ws -> do
let ts = parse mgr lang cat $ unwords ws
mapM_ (putStrLn . showTree) ts
"search":cat:n:ws -> do
case G.parse (read n) grammar (CId cat) ws of
t:_ -> prlin t
_ -> putStrLn "no parse found"
"h":_ -> printHelp mgr
_ -> lins $ readTree mgr s
where
grammar = gfcc mgr
langs = languages mgr
lins t = mapM_ (lint t) $ langs
lint t lang = do
---- putStrLn $ showTree $ linExp grammar lang t
lin t lang
lin t lang = do
putStrLn $ linearize mgr lang t
prlins t = do
putStrLn $ showTree t
lins t
prlin t = do
putStrLn $ showTree t
prlinonly t
prlinonly t = mapM_ (lin t) $ langs
read1 s = if all isDigit s then read s else 1

View File

@@ -1,94 +0,0 @@
module GF.Canon.GFCC.SkelGFCC where
-- Haskell module generated by the BNF converter
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.ErrM
type Result = Err String
failure :: Show a => a -> Result
failure x = Bad $ "Undefined case: " ++ show x
transCId :: CId -> Result
transCId x = case x of
CId str -> failure x
transGrammar :: Grammar -> Result
transGrammar x = case x of
Grm header abstract concretes -> failure x
transHeader :: Header -> Result
transHeader x = case x of
Hdr cid cids -> failure x
transAbstract :: Abstract -> Result
transAbstract x = case x of
Abs absdefs -> failure x
transConcrete :: Concrete -> Result
transConcrete x = case x of
Cnc cid cncdefs -> failure x
transAbsDef :: AbsDef -> Result
transAbsDef x = case x of
Fun cid type' exp -> failure x
transCncDef :: CncDef -> Result
transCncDef x = case x of
Lin cid term -> failure x
transType :: Type -> Result
transType x = case x of
Typ cids cid -> failure x
transExp :: Exp -> Result
transExp x = case x of
Tr atom exps -> failure x
transAtom :: Atom -> Result
transAtom x = case x of
AC cid -> failure x
AS str -> failure x
AI n -> failure x
AF d -> failure x
AM -> failure x
transTerm :: Term -> Result
transTerm x = case x of
R terms -> failure x
P term0 term -> failure x
S terms -> failure x
K tokn -> failure x
V n -> failure x
C n -> failure x
F cid -> failure x
FV terms -> failure x
W str term -> failure x
RP term0 term -> failure x
TM -> failure x
L cid term -> failure x
BV cid -> failure x
transTokn :: Tokn -> Result
transTokn x = case x of
KS str -> failure x
KP strs variants -> failure x
transVariant :: Variant -> Result
transVariant x = case x of
Var strs0 strs -> failure x

View File

@@ -1,64 +0,0 @@
-- to test GFCC compilation
flags coding=utf8 ;
cat S ; NP ; N ; VP ;
fun Pred : NP -> VP -> S ;
fun Pred2 : NP -> VP -> NP -> S ;
fun Det, Dets : N -> NP ;
fun Mina, Sina, Me, Te : NP ;
fun Raha, Paska, Pallo : N ;
fun Puhua, Munia, Sanoa : VP ;
param Person = P1 | P2 | P3 ;
param Number = Sg | Pl ;
param Case = Nom | Part ;
param NForm = NF Number Case ;
param VForm = VF Number Person ;
lincat N = Noun ;
lincat VP = Verb ;
oper Noun = {s : NForm => Str} ;
oper Verb = {s : VForm => Str} ;
lincat NP = {s : Case => Str ; a : {n : Number ; p : Person}} ;
lin Pred np vp = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p} ;
lin Pred2 np vp ob = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p ++ ob.s ! Part} ;
lin Det no = {s = \\c => no.s ! NF Sg c ; a = {n = Sg ; p = P3}} ;
lin Dets no = {s = \\c => no.s ! NF Pl c ; a = {n = Pl ; p = P3}} ;
lin Mina = {s = table Case ["minä" ; "minua"] ; a = {n = Sg ; p = P1}} ;
lin Te = {s = table Case ["te" ; "teitä"] ; a = {n = Pl ; p = P2}} ;
lin Sina = {s = table Case ["sinä" ; "sinua"] ; a = {n = Sg ; p = P2}} ;
lin Me = {s = table Case ["me" ; "meitä"] ; a = {n = Pl ; p = P1}} ;
lin Raha = mkN "raha" ;
lin Paska = mkN "paska" ;
lin Pallo = mkN "pallo" ;
lin Puhua = mkV "puhu" ;
lin Munia = mkV "muni" ;
lin Sanoa = mkV "sano" ;
oper mkN : Str -> Noun = \raha -> {
s = table {
NF Sg Nom => raha ;
NF Sg Part => raha + "a" ;
NF Pl Nom => raha + "t" ;
NF Pl Part => Predef.tk 1 raha + "oja"
}
} ;
oper mkV : Str -> Verb = \puhu -> {
s = table {
VF Sg P1 => puhu + "n" ;
VF Sg P2 => puhu + "t" ;
VF Sg P3 => puhu + Predef.dp 1 puhu ;
VF Pl P1 => puhu + "mme" ;
VF Pl P2 => puhu + "tte" ;
VF Pl P3 => puhu + "vat"
}
} ;

View File

@@ -1,58 +0,0 @@
-- automatically generated by BNF Converter
module Main where
import IO ( stdin, hGetContents )
import System ( getArgs, getProgName )
import GF.Canon.GFCC.LexGFCC
import GF.Canon.GFCC.ParGFCC
import GF.Canon.GFCC.SkelGFCC
import GF.Canon.GFCC.PrintGFCC
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.ErrM
type ParseFun a = [Token] -> Err a
myLLexer = myLexer
type Verbosity = Int
putStrV :: Verbosity -> String -> IO ()
putStrV v s = if v > 1 then putStrLn s else return ()
runFile :: (Print a, Show a) => Verbosity -> ParseFun a -> FilePath -> IO ()
runFile v p f = putStrLn f >> readFile f >>= run v p
run :: (Print a, Show a) => Verbosity -> ParseFun a -> String -> IO ()
run v p s = let ts = myLLexer s in case p ts of
Bad s -> do putStrLn "\nParse Failed...\n"
putStrV v "Tokens:"
putStrV v $ show ts
putStrLn s
Ok tree -> do putStrLn "\nParse Successful!"
showTree v tree
showTree :: (Show a, Print a) => Int -> a -> IO ()
showTree v tree
= do
putStrV v $ "\n[Abstract Syntax]\n\n" ++ show tree
putStrV v $ "\n[Linearized tree]\n\n" ++ printTree tree
main :: IO ()
main = do args <- getArgs
case args of
[] -> hGetContents stdin >>= run 2 pGrammar
"-s":fs -> mapM_ (runFile 0 pGrammar) fs
fs -> mapM_ (runFile 2 pGrammar) fs

View File

@@ -1,13 +0,0 @@
concrete Eng of Ex = {
lincat
S = {s : Str} ;
NP = {s : Str ; n : Num} ;
VP = {s : Num => Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = {s = np.s ++ vp.s ! np.n} ;
She = {s = "she" ; n = Sg} ;
They = {s = "they" ; n = Pl} ;
Sleep = {s = table {Sg => "sleeps" ; Pl => "sleep"}} ;
}

View File

@@ -1,8 +0,0 @@
abstract Ex = {
cat
S ; NP ; VP ;
fun
Pred : NP -> VP -> S ;
She, They : NP ;
Sleep : VP ;
}

View File

@@ -1,13 +0,0 @@
concrete Swe of Ex = {
lincat
S = {s : Str} ;
NP = {s : Str} ;
VP = {s : Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = {s = np.s ++ vp.s} ;
She = {s = "hon"} ;
They = {s = "de"} ;
Sleep = {s = "sover"} ;
}

View File

@@ -1,842 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<TITLE>The GFCC Grammar Format</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>The GFCC Grammar Format</H1>
<FONT SIZE="4">
<I>Aarne Ranta</I><BR>
October 19, 2006
</FONT></CENTER>
<P></P>
<HR NOSHADE SIZE=1>
<P></P>
<UL>
<LI><A HREF="#toc1">What is GFCC</A>
<LI><A HREF="#toc2">GFCC vs. GFC</A>
<LI><A HREF="#toc3">The syntax of GFCC files</A>
<UL>
<LI><A HREF="#toc4">Top level</A>
<LI><A HREF="#toc5">Abstract syntax</A>
<LI><A HREF="#toc6">Concrete syntax</A>
</UL>
<LI><A HREF="#toc7">The semantics of concrete syntax terms</A>
<UL>
<LI><A HREF="#toc8">Linearization and realization</A>
<LI><A HREF="#toc9">Term evaluation</A>
<LI><A HREF="#toc10">The special term constructors</A>
</UL>
<LI><A HREF="#toc11">Compiling to GFCC</A>
<UL>
<LI><A HREF="#toc12">Problems in GFCC compilation</A>
<LI><A HREF="#toc13">The representation of linearization types</A>
<LI><A HREF="#toc14">Running the compiler and the GFCC interpreter</A>
</UL>
<LI><A HREF="#toc15">The reference interpreter</A>
<LI><A HREF="#toc16">Interpreter in C++</A>
<LI><A HREF="#toc17">Some things to do</A>
</UL>
<P></P>
<HR NOSHADE SIZE=1>
<P></P>
<P>
Author's address:
<A HREF="http://www.cs.chalmers.se/~aarne"><CODE>http://www.cs.chalmers.se/~aarne</CODE></A>
</P>
<P>
History:
</P>
<UL>
<LI>19 Oct: translation of lincats, new figures on C++
<LI>3 Oct 2006: first version
</UL>
<A NAME="toc1"></A>
<H2>What is GFCC</H2>
<P>
GFCC is a low-level format for GF grammars. Its aim is to contain the minimum
that is needed to process GF grammars at runtime. This minimality has three
advantages:
</P>
<UL>
<LI>compact grammar files and run-time objects
<LI>time and space efficient processing
<LI>simple definition of interpreters
</UL>
<P>
The idea is that all embedded GF applications are compiled to GFCC.
The GF system would be primarily used as a compiler and as a grammar
development tool.
</P>
<P>
Since GFCC is implemented in BNFC, a parser of the format is readily
available for C, C++, Haskell, Java, and OCaml. Also an XML
representation is generated in BNFC. A
<A HREF="../">reference implementation</A>
of linearization and some other functions has been written in Haskell.
</P>
<A NAME="toc2"></A>
<H2>GFCC vs. GFC</H2>
<P>
GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed
to be a run-time format, but also to
support separate compilation of grammars, i.e.
to store the results of compiling
individual GF modules. But this means that GFC has to contain extra information,
such as type annotations, which is only needed in compilation and not at
run-time. In particular, the pattern matching syntax and semantics of GFC is
complex and therefore difficult to implement in new platforms.
</P>
<P>
The main differences of GFCC compared with GFC can be summarized as follows:
</P>
<UL>
<LI>there are no modules, and therefore no qualified names
<LI>a GFCC grammar is multilingual, and consists of a common abstract syntax
together with one concrete syntax per language
<LI>records and tables are replaced by arrays
<LI>record labels and parameter values are replaced by integers
<LI>record projection and table selection are replaced by array indexing
<LI>there is (so far) no support for dependent types or higher-order abstract
syntax (which would be easy to add, but make interpreters much more difficult
to write)
</UL>
<P>
Here is an example of a GF grammar, consisting of three modules,
as translated to GFCC. The representations are aligned, with the exceptions
due to the alphabetical sorting of GFCC grammars.
</P>
<PRE>
grammar Ex(Eng,Swe);
abstract Ex = { abstract {
cat
S ; NP ; VP ;
fun
Pred : NP -&gt; VP -&gt; S ; Pred : NP,VP -&gt; S = (Pred);
She, They : NP ; She : -&gt; NP = (She);
Sleep : VP ; Sleep : -&gt; VP = (Sleep);
They : -&gt; NP = (They);
} } ;
concrete Eng of Ex = { concrete Eng {
lincat
S = {s : Str} ;
NP = {s : Str ; n : Num} ;
VP = {s : Num =&gt; Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))];
s = np.s ++ vp.s ! np.n} ;
She = {s = "she" ; n = Sg} ; She = [0, "she"];
They = {s = "they" ; n = Pl} ;
Sleep = {s = table { Sleep = [("sleep" + ["s",""])];
Sg =&gt; "sleeps" ;
Pl =&gt; "sleep" They = [1, "they"];
} } ;
} ;
}
concrete Swe of Ex = { concrete Swe {
lincat
S = {s : Str} ;
NP = {s : Str} ;
VP = {s : Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = { Pred = [(($0!0),($1!0))];
s = np.s ++ vp.s} ;
She = {s = "hon"} ; She = ["hon"];
They = {s = "de"} ; They = ["de"];
Sleep = {s = "sover"} ; Sleep = ["sover"];
} } ;
</PRE>
<P></P>
<A NAME="toc3"></A>
<H2>The syntax of GFCC files</H2>
<A NAME="toc4"></A>
<H3>Top level</H3>
<P>
A grammar has a header telling the name of the abstract syntax
(often specifying an application domain), and the names of
the concrete languages. The abstract syntax and the concrete
syntaxes themselves follow.
</P>
<PRE>
Grammar ::= Header ";" Abstract ";" [Concrete] ;
Header ::= "grammar" CId "(" [CId] ")" ;
Abstract ::= "abstract" "{" [AbsDef] "}" ;
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
</PRE>
<P>
Abstract syntax judgements give typings and semantic definitions.
Concrete syntax judgements give linearizations.
</P>
<PRE>
AbsDef ::= CId ":" Type "=" Exp ;
CncDef ::= CId "=" Term ;
</PRE>
<P>
Also flags are possible, local to each "module" (i.e. abstract and concretes).
</P>
<PRE>
AbsDef ::= "%" CId "=" String ;
CncDef ::= "%" CId "=" String ;
</PRE>
<P>
For the run-time system, the reference implementation in Haskell
uses a structure that gives efficient look-up:
</P>
<PRE>
data GFCC = GFCC {
absname :: CId ,
cncnames :: [CId] ,
abstract :: Abstr ,
concretes :: Map CId Concr
}
data Abstr = Abstr {
funs :: Map CId Type, -- find the type of a fun
cats :: Map CId [CId] -- find the funs giving a cat
}
type Concr = Map CId Term
</PRE>
<P></P>
<A NAME="toc5"></A>
<H3>Abstract syntax</H3>
<P>
Types are first-order function types built from
category symbols. Syntax trees (<CODE>Exp</CODE>) are
rose trees with the head (<CODE>Atom</CODE>) either a function
constant, a metavariable, or a string, integer, or float
literal.
</P>
<PRE>
Type ::= [CId] "-&gt;" CId ;
Exp ::= "(" Atom [Exp] ")" ;
Atom ::= CId ; -- function constant
Atom ::= "?" ; -- metavariable
Atom ::= String ; -- string literal
Atom ::= Integer ; -- integer literal
Atom ::= Double ; -- float literal
</PRE>
<P></P>
<A NAME="toc6"></A>
<H3>Concrete syntax</H3>
<P>
Linearization terms (<CODE>Term</CODE>) are built as follows.
Constructor names are shown to make the later code
examples readable.
</P>
<PRE>
R. Term ::= "[" [Term] "]" ; -- array
P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field
S. Term ::= "(" [Term] ")" ; -- sequence with ++
K. Term ::= Tokn ; -- token
V. Term ::= "$" Integer ; -- argument
C. Term ::= Integer ; -- array index
FV. Term ::= "[|" [Term] "|]" ; -- free variation
TM. Term ::= "?" ; -- linearization of metavariable
</PRE>
<P>
Tokens are strings or (maybe obsolescent) prefix-dependent
variant lists.
</P>
<PRE>
KS. Tokn ::= String ;
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
Var. Variant ::= [String] "/" [String] ;
</PRE>
<P>
Three special forms of terms are introduced by the compiler
as optimizations. They can in principle be eliminated, but
their presence makes grammars much more compact. Their semantics
will be explained in a later section.
</P>
<PRE>
F. Term ::= CId ; -- global constant
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
</PRE>
<P>
Identifiers are like <CODE>Ident</CODE> in GF and GFC, except that
the compiler produces constants prefixed with <CODE>_</CODE> in
the common subterm elimination optimization.
</P>
<PRE>
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
</PRE>
<P></P>
<A NAME="toc7"></A>
<H2>The semantics of concrete syntax terms</H2>
<A NAME="toc8"></A>
<H3>Linearization and realization</H3>
<P>
The linearization algorithm is essentially the same as in
GFC: a tree is linearized by evaluating its linearization term
in the environment of the linearizations of the subtrees.
Literal atoms are linearized in the obvious way.
The function also needs to know the language (i.e. concrete syntax)
in which linearization is performed.
</P>
<PRE>
linExp :: GFCC -&gt; CId -&gt; Exp -&gt; Term
linExp mcfg lang tree@(Tr at trees) = case at of
AC fun -&gt; comp (Prelude.map lin trees) $ look fun
AS s -&gt; R [kks (show s)] -- quoted
AI i -&gt; R [kks (show i)]
AF d -&gt; R [kks (show d)]
AM -&gt; TM
where
lin = linExp mcfg lang
comp = compute mcfg lang
look = lookLin mcfg lang
</PRE>
<P>
The result of linearization is usually a record, which is realized as
a string using the following algorithm.
</P>
<PRE>
realize :: Term -&gt; String
realize trm = case trm of
R (t:_) -&gt; realize t
S ss -&gt; unwords $ Prelude.map realize ss
K (KS s) -&gt; s
K (KP s _) -&gt; unwords s ---- prefix choice TODO
W s t -&gt; s ++ realize t
FV (t:_) -&gt; realize t
TM -&gt; "?"
</PRE>
<P>
Since the order of record fields is not necessarily
the same as in GF source,
this realization does not work securely for
categories whose lincats more than one field.
</P>
<A NAME="toc9"></A>
<H3>Term evaluation</H3>
<P>
Evaluation follows call-by-value order, with two environments
needed:
</P>
<UL>
<LI>the grammar (a concrete syntax) to give the global constants
<LI>an array of terms to give the subtree linearizations
</UL>
<P>
The code is presented in one-level pattern matching, to
enable reimplementations in languages that do not permit
deep patterns (such as Java and C++).
</P>
<PRE>
compute :: GFCC -&gt; CId -&gt; [Term] -&gt; Term -&gt; Term
compute mcfg lang args = comp where
comp trm = case trm of
P r p -&gt; proj (comp r) (comp p)
RP i t -&gt; RP (comp i) (comp t)
W s t -&gt; W s (comp t)
R ts -&gt; R $ Prelude.map comp ts
V i -&gt; idx args (fromInteger i) -- already computed
F c -&gt; comp $ look c -- not computed (if contains V)
FV ts -&gt; FV $ Prelude.map comp ts
S ts -&gt; S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -&gt; trm
look = lookLin mcfg lang
idx xs i = xs !! i
proj r p = case (r,p) of
(_, FV ts) -&gt; FV $ Prelude.map (proj r) ts
(W s t, _) -&gt; kks (s ++ getString (proj t p))
_ -&gt; comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -&gt; s
_ -&gt; trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -&gt; fromInteger i
RP p _ -&gt; getIndex p
TM -&gt; 0 -- default value for parameter
_ -&gt; trace ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -&gt; idx rs i
RP _ r -&gt; getField r i
TM -&gt; TM
_ -&gt; trace ("ERROR in grammar compiler: field from " ++ show t) t
</PRE>
<P></P>
<A NAME="toc10"></A>
<H3>The special term constructors</H3>
<P>
The three forms introduced by the compiler may a need special
explanation.
</P>
<P>
Global constants
</P>
<PRE>
Term ::= CId ;
</PRE>
<P>
are shorthands for complex terms. They are produced by the
compiler by (iterated) common subexpression elimination.
They are often more powerful than hand-devised code sharing in the source
code. They could be computed off-line by replacing each identifier by
its definition.
</P>
<P>
Prefix-suffix tables
</P>
<PRE>
Term ::= "(" String "+" Term ")" ;
</PRE>
<P>
represent tables of word forms divided to the longest common prefix
and its array of suffixes. In the example grammar above, we have
</P>
<PRE>
Sleep = [("sleep" + ["s",""])]
</PRE>
<P>
which in fact is equal to the array of full forms
</P>
<PRE>
["sleeps", "sleep"]
</PRE>
<P>
The power of this construction comes from the fact that suffix sets
tend to be repeated in a language, and can therefore be collected
by common subexpression elimination. It is this technique that
explains the used syntax rather than the more accurate
</P>
<PRE>
"(" String "+" [String] ")"
</PRE>
<P>
since we want the suffix part to be a <CODE>Term</CODE> for the optimization to
take effect.
</P>
<P>
The most curious construct of GFCC is the parameter array alias,
</P>
<PRE>
Term ::= "(" Term "@" Term ")";
</PRE>
<P>
This form is used as the value of parameter records, such as the type
</P>
<PRE>
{n : Number ; p : Person}
</PRE>
<P>
The problem with parameter records is their double role.
They can be used like parameter values, as indices in selection,
</P>
<PRE>
VP.s ! {n = Sg ; p = P3}
</PRE>
<P>
but also as records, from which parameters can be projected:
</P>
<PRE>
{n = Sg ; p = P3}.n
</PRE>
<P>
Whichever use is selected as primary, a prohibitively complex
case expression must be generated at compilation to GFCC to get the
other use. The adopted
solution is to generate a pair containing both a parameter value index
and an array of indices of record fields. For instance, if we have
</P>
<PRE>
param Number = Sg | Pl ; Person = P1 | P2 | P3 ;
</PRE>
<P>
we get the encoding
</P>
<PRE>
{n = Sg ; p = P3} ---&gt; (2 @ [0,2])
</PRE>
<P>
The GFCC computation rules are essentially
</P>
<PRE>
(t ! (i @ _)) = (t ! i)
((_ @ r) ! j) =(r ! j)
</PRE>
<P></P>
<A NAME="toc11"></A>
<H2>Compiling to GFCC</H2>
<P>
Compilation to GFCC is performed by the GF grammar compiler, and
GFCC interpreters need not know what it does. For grammar writers,
however, it might be interesting to know what happens to the grammars
in the process.
</P>
<P>
The compilation phases are the following
</P>
<OL>
<LI>translate GF source to GFC, as always in GF
<LI>undo GFC back-end optimizations
<LI>perform the <CODE>values</CODE> optimization to normalize tables
<LI>create a symbol table mapping the GFC parameter and record types to
fixed-size arrays, and parameter values and record labels to integers
<LI>traverse the linearization rules replacing parameters and labels by integers
<LI>reorganize the created GFC grammar so that it has just one abstract syntax
and one concrete syntax per language
<LI>apply UTF8 encoding to the grammar, if not yet applied (this is told by the
<CODE>coding</CODE> flag)
<LI>translate the GFC syntax tree to a GFCC syntax tree, using a simple
compositional mapping
<LI>perform the word-suffix optimization on GFCC linearization terms
<LI>perform subexpression elimination on each concrete syntax module
<LI>print out the GFCC code
</OL>
<P>
Notice that a major part of the compilation is done within GFC, so that
GFC-related tasks (such as parser generation) could be performed by
using the old algorithms.
</P>
<A NAME="toc12"></A>
<H3>Problems in GFCC compilation</H3>
<P>
Two major problems had to be solved in compiling GFC to GFCC:
</P>
<UL>
<LI>consistent order of tables and records, to permit the array translation
<LI>run-time variables in complex parameter values.
</UL>
<P>
The current implementation is still experimental and may fail
to generate correct code. Any errors remaining are likely to be
related to the two problems just mentioned.
</P>
<P>
The order problem is solved in different ways for tables and records.
For tables, the <CODE>values</CODE> optimization of GFC already manages to
maintain a canonical order. But this order can be destroyed by the
<CODE>share</CODE> optimization. To make sure that GFCC compilation works properly,
it is safest to recompile the GF grammar by using the <CODE>values</CODE>
optimization flag.
</P>
<P>
Records can be canonically ordered by sorting them by labels.
In fact, this was done in connection of the GFCC work as a part
of the GFC generation, to guarantee consistency. This means that
e.g. the <CODE>s</CODE> field will in general no longer appear as the first
field, even if it does so in the GF source code. But relying on the
order of fields in a labelled record would be misplaced anyway.
</P>
<P>
The canonical form of records is further complicated by lock fields,
i.e. dummy fields of form <CODE>lock_C = &lt;&gt;</CODE>, which are added to grammar
libraries to force intensionality of linearization types. The problem
is that the absence of a lock field only generates a warning, not
an error. Therefore a GFC grammar can contain objects of the same
type with and without a lock field. This problem was solved in GFCC
generation by just removing all lock fields (defined as fields whose
type is the empty record type). This has the further advantage of
(slightly) reducing the grammar size. More importantly, it is safe
to remove lock fields, because they are never used in computation,
and because intensional types are only needed in grammars reused
as libraries, not in grammars used at runtime.
</P>
<P>
While the order problem is rather bureaucratic in nature, run-time
variables are an interesting problem. They arise in the presence
of complex parameter values, created by argument-taking constructors
and parameter records. To give an example, consider the GF parameter
type system
</P>
<PRE>
Number = Sg | Pl ;
Person = P1 | P2 | P3 ;
Agr = Ag Number Person ;
</PRE>
<P>
The values can be translated to integers in the expected way,
</P>
<PRE>
Sg = 0, Pl = 1
P1 = 0, P2 = 1, P3 = 2
Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
</PRE>
<P>
However, an argument of <CODE>Agr</CODE> can be a run-time variable, as in
</P>
<PRE>
Ag np.n P3
</PRE>
<P>
This expression must first be translated to a case expression,
</P>
<PRE>
case np.n of {
0 =&gt; 2 ;
1 =&gt; 5
}
</PRE>
<P>
which can then be translated to the GFCC term
</P>
<PRE>
([2,5] ! ($0 ! $1))
</PRE>
<P>
assuming that the variable <CODE>np</CODE> is the first argument and that its
<CODE>Number</CODE> field is the second in the record.
</P>
<P>
This transformation of course has to be performed recursively, since
there can be several run-time variables in a parameter value:
</P>
<PRE>
Ag np.n np.p
</PRE>
<P>
A similar transformation would be possible to deal with the double
role of parameter records discussed above. Thus the type
</P>
<PRE>
RNP = {n : Number ; p : Person}
</PRE>
<P>
could be uniformly translated into the set <CODE>{0,1,2,3,4,5}</CODE>
as <CODE>Agr</CODE> above. Selections would be simple instances of indexing.
But any projection from the record should be translated into
a case expression,
</P>
<PRE>
rnp.n ===&gt;
case rnp of {
0 =&gt; 0 ;
1 =&gt; 0 ;
2 =&gt; 0 ;
3 =&gt; 1 ;
4 =&gt; 1 ;
5 =&gt; 1
}
</PRE>
<P>
To avoid the code bloat resulting from this, we chose the alias representation
which is easy enough to deal with in interpreters.
</P>
<A NAME="toc13"></A>
<H3>The representation of linearization types</H3>
<P>
Linearization types (<CODE>lincat</CODE>) are not needed when generating with
GFCC, but they have been added to enable parser generation directly from
GFCC. The linearization type definitions are shown as a part of the
concrete syntax, by using terms to represent types. Here is the table
showing how different linearization types are encoded.
</P>
<PRE>
P* = size(P) -- parameter type
{_ : I ; __ : R}* = (I* @ R*) -- record of parameters
{r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- other record
(P =&gt; T)* = [T* ,...,T*] -- size(P) times
Str* = ()
</PRE>
<P>
The category symbols are prefixed with two underscores (<CODE>__</CODE>).
For example, the linearization type <CODE>present/CatEng.NP</CODE> is
translated as follows:
</P>
<PRE>
NP = {
a : { -- 6 = 2*3 values
n : {ParamX.Number} ; -- 2 values
p : {ParamX.Person} -- 3 values
} ;
s : {ResEng.Case} =&gt; Str -- 3 values
}
__NP = [(6@[2,3]),[(),(),()]]
</PRE>
<P></P>
<A NAME="toc14"></A>
<H3>Running the compiler and the GFCC interpreter</H3>
<P>
GFCC generation is a part of the
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">developers' version</A>
of GF since September 2006. To invoke the compiler, the flag
<CODE>-printer=gfcc</CODE> to the command
<CODE>pm = print_multi</CODE> is used. It is wise to recompile the grammar from
source, since previously compiled libraries may not obey the canonical
order of records. To <CODE>strip</CODE> the grammar before
GFCC translation removes unnecessary interface references.
Here is an example, performed in
<A HREF="../../../../../examples/bronzeage">example/bronzeage</A>.
</P>
<PRE>
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
strip
pm -printer=gfcc | wf bronze.gfcc
</PRE>
<P></P>
<A NAME="toc15"></A>
<H2>The reference interpreter</H2>
<P>
The reference interpreter written in Haskell consists of the following files:
</P>
<PRE>
-- source file for BNFC
GFCC.cf -- labelled BNF grammar of gfcc
-- files generated by BNFC
AbsGFCC.hs -- abstrac syntax of gfcc
ErrM.hs -- error monad used internally
LexGFCC.hs -- lexer of gfcc files
ParGFCC.hs -- parser of gfcc files and syntax trees
PrintGFCC.hs -- printer of gfcc files and syntax trees
-- hand-written files
DataGFCC.hs -- post-parser grammar creation, linearization and evaluation
GenGFCC.hs -- random and exhaustive generation, generate-and-test parsing
RunGFCC.hs -- main function - a simple command interpreter
</PRE>
<P>
It is included in the
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">developers' version</A>
of GF, in the subdirectory <A HREF="../"><CODE>GF/src/GF/Canon/GFCC</CODE></A>.
</P>
<P>
To compile the interpreter, type
</P>
<PRE>
make gfcc
</PRE>
<P>
in <CODE>GF/src</CODE>. To run it, type
</P>
<PRE>
./gfcc &lt;GFCC-file&gt;
</PRE>
<P>
The available commands are
</P>
<UL>
<LI><CODE>gr &lt;Cat&gt; &lt;Int&gt;</CODE>: generate a number of random trees in category.
and show their linearizations in all languages
<LI><CODE>grt &lt;Cat&gt; &lt;Int&gt;</CODE>: generate a number of random trees in category.
and show the trees and their linearizations in all languages
<LI><CODE>gt &lt;Cat&gt; &lt;Int&gt;</CODE>: generate a number of trees in category from smallest,
and show their linearizations in all languages
<LI><CODE>gtt &lt;Cat&gt; &lt;Int&gt;</CODE>: generate a number of trees in category from smallest,
and show the trees and their linearizations in all languages
<LI><CODE>p &lt;Int&gt; &lt;Cat&gt; &lt;String&gt;</CODE>: "parse", i.e. generate trees until match or
until the given number have been generated
<LI><CODE>&lt;Tree&gt;</CODE>: linearize tree in all languages, also showing full records
<LI><CODE>quit</CODE>: terminate the system cleanly
</UL>
<A NAME="toc16"></A>
<H2>Interpreter in C++</H2>
<P>
A base-line interpreter in C++ has been started.
Its main functionality is random generation of trees and linearization of them.
</P>
<P>
Here are some results from running the different interpreters, compared
to running the same grammar in GF, saved in <CODE>.gfcm</CODE> format.
The grammar contains the English, German, and Norwegian
versions of Bronzeage. The experiment was carried out on
Ubuntu Linux laptop with 1.5 GHz Intel centrino processor.
</P>
<TABLE CELLPADDING="4" BORDER="1">
<TR>
<TH></TH>
<TH>GF</TH>
<TH>gfcc(hs)</TH>
<TH>gfcc++</TH>
</TR>
<TR>
<TD>program size</TD>
<TD ALIGN="center">7249k</TD>
<TD ALIGN="center">803k</TD>
<TD ALIGN="right">113k</TD>
</TR>
<TR>
<TD>grammar size</TD>
<TD ALIGN="center">336k</TD>
<TD ALIGN="center">119k</TD>
<TD ALIGN="right">119k</TD>
</TR>
<TR>
<TD>read grammar</TD>
<TD ALIGN="center">1150ms</TD>
<TD ALIGN="center">510ms</TD>
<TD ALIGN="right">100ms</TD>
</TR>
<TR>
<TD>generate 222</TD>
<TD ALIGN="center">9500ms</TD>
<TD ALIGN="center">450ms</TD>
<TD ALIGN="right">800ms</TD>
</TR>
<TR>
<TD>memory</TD>
<TD ALIGN="center">21M</TD>
<TD ALIGN="center">10M</TD>
<TD ALIGN="right">20M</TD>
</TR>
</TABLE>
<P></P>
<P>
To summarize:
</P>
<UL>
<LI>going from GF to gfcc is a major win in both code size and efficiency
<LI>going from Haskell to C++ interpreter is not a win yet, because of a space
leak in the C++ version
</UL>
<A NAME="toc17"></A>
<H2>Some things to do</H2>
<P>
Interpreter in Java.
</P>
<P>
Parsing via MCFG
</P>
<UL>
<LI>the FCFG format can possibly be simplified
<LI>parser grammars should be saved in files to make interpreters easier
</UL>
<P>
Hand-written parsers for GFCC grammars to reduce code size
(and efficiency?) of interpreters.
</P>
<P>
Binary format and/or file compression of GFCC output.
</P>
<P>
Syntax editor based on GFCC.
</P>
<P>
Rewriting of resource libraries in order to exploit the
word-suffix sharing better (depth-one tables, as in FM).
</P>
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags -thtml -\-toc gfcc.txt -->
</BODY></HTML>

View File

@@ -1,656 +0,0 @@
The GFCC Grammar Format
Aarne Ranta
October 19, 2006
Author's address:
[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
% to compile: txt2tags -thtml --toc gfcc.txt
History:
- 19 Oct: translation of lincats, new figures on C++
- 3 Oct 2006: first version
==What is GFCC==
GFCC is a low-level format for GF grammars. Its aim is to contain the minimum
that is needed to process GF grammars at runtime. This minimality has three
advantages:
- compact grammar files and run-time objects
- time and space efficient processing
- simple definition of interpreters
The idea is that all embedded GF applications are compiled to GFCC.
The GF system would be primarily used as a compiler and as a grammar
development tool.
Since GFCC is implemented in BNFC, a parser of the format is readily
available for C, C++, Haskell, Java, and OCaml. Also an XML
representation is generated in BNFC. A
[reference implementation ../]
of linearization and some other functions has been written in Haskell.
==GFCC vs. GFC==
GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed
to be a run-time format, but also to
support separate compilation of grammars, i.e.
to store the results of compiling
individual GF modules. But this means that GFC has to contain extra information,
such as type annotations, which is only needed in compilation and not at
run-time. In particular, the pattern matching syntax and semantics of GFC is
complex and therefore difficult to implement in new platforms.
The main differences of GFCC compared with GFC can be summarized as follows:
- there are no modules, and therefore no qualified names
- a GFCC grammar is multilingual, and consists of a common abstract syntax
together with one concrete syntax per language
- records and tables are replaced by arrays
- record labels and parameter values are replaced by integers
- record projection and table selection are replaced by array indexing
- there is (so far) no support for dependent types or higher-order abstract
syntax (which would be easy to add, but make interpreters much more difficult
to write)
Here is an example of a GF grammar, consisting of three modules,
as translated to GFCC. The representations are aligned, with the exceptions
due to the alphabetical sorting of GFCC grammars.
```
grammar Ex(Eng,Swe);
abstract Ex = { abstract {
cat
S ; NP ; VP ;
fun
Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred);
She, They : NP ; She : -> NP = (She);
Sleep : VP ; Sleep : -> VP = (Sleep);
They : -> NP = (They);
} } ;
concrete Eng of Ex = { concrete Eng {
lincat
S = {s : Str} ;
NP = {s : Str ; n : Num} ;
VP = {s : Num => Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))];
s = np.s ++ vp.s ! np.n} ;
She = {s = "she" ; n = Sg} ; She = [0, "she"];
They = {s = "they" ; n = Pl} ;
Sleep = {s = table { Sleep = [("sleep" + ["s",""])];
Sg => "sleeps" ;
Pl => "sleep" They = [1, "they"];
} } ;
} ;
}
concrete Swe of Ex = { concrete Swe {
lincat
S = {s : Str} ;
NP = {s : Str} ;
VP = {s : Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = { Pred = [(($0!0),($1!0))];
s = np.s ++ vp.s} ;
She = {s = "hon"} ; She = ["hon"];
They = {s = "de"} ; They = ["de"];
Sleep = {s = "sover"} ; Sleep = ["sover"];
} } ;
```
==The syntax of GFCC files==
===Top level===
A grammar has a header telling the name of the abstract syntax
(often specifying an application domain), and the names of
the concrete languages. The abstract syntax and the concrete
syntaxes themselves follow.
```
Grammar ::= Header ";" Abstract ";" [Concrete] ;
Header ::= "grammar" CId "(" [CId] ")" ;
Abstract ::= "abstract" "{" [AbsDef] "}" ;
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
```
Abstract syntax judgements give typings and semantic definitions.
Concrete syntax judgements give linearizations.
```
AbsDef ::= CId ":" Type "=" Exp ;
CncDef ::= CId "=" Term ;
```
Also flags are possible, local to each "module" (i.e. abstract and concretes).
```
AbsDef ::= "%" CId "=" String ;
CncDef ::= "%" CId "=" String ;
```
For the run-time system, the reference implementation in Haskell
uses a structure that gives efficient look-up:
```
data GFCC = GFCC {
absname :: CId ,
cncnames :: [CId] ,
abstract :: Abstr ,
concretes :: Map CId Concr
}
data Abstr = Abstr {
funs :: Map CId Type, -- find the type of a fun
cats :: Map CId [CId] -- find the funs giving a cat
}
type Concr = Map CId Term
```
===Abstract syntax===
Types are first-order function types built from
category symbols. Syntax trees (``Exp``) are
rose trees with the head (``Atom``) either a function
constant, a metavariable, or a string, integer, or float
literal.
```
Type ::= [CId] "->" CId ;
Exp ::= "(" Atom [Exp] ")" ;
Atom ::= CId ; -- function constant
Atom ::= "?" ; -- metavariable
Atom ::= String ; -- string literal
Atom ::= Integer ; -- integer literal
Atom ::= Double ; -- float literal
```
===Concrete syntax===
Linearization terms (``Term``) are built as follows.
Constructor names are shown to make the later code
examples readable.
```
R. Term ::= "[" [Term] "]" ; -- array
P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field
S. Term ::= "(" [Term] ")" ; -- sequence with ++
K. Term ::= Tokn ; -- token
V. Term ::= "$" Integer ; -- argument
C. Term ::= Integer ; -- array index
FV. Term ::= "[|" [Term] "|]" ; -- free variation
TM. Term ::= "?" ; -- linearization of metavariable
```
Tokens are strings or (maybe obsolescent) prefix-dependent
variant lists.
```
KS. Tokn ::= String ;
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
Var. Variant ::= [String] "/" [String] ;
```
Three special forms of terms are introduced by the compiler
as optimizations. They can in principle be eliminated, but
their presence makes grammars much more compact. Their semantics
will be explained in a later section.
```
F. Term ::= CId ; -- global constant
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
```
Identifiers are like ``Ident`` in GF and GFC, except that
the compiler produces constants prefixed with ``_`` in
the common subterm elimination optimization.
```
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
```
==The semantics of concrete syntax terms==
===Linearization and realization===
The linearization algorithm is essentially the same as in
GFC: a tree is linearized by evaluating its linearization term
in the environment of the linearizations of the subtrees.
Literal atoms are linearized in the obvious way.
The function also needs to know the language (i.e. concrete syntax)
in which linearization is performed.
```
linExp :: GFCC -> CId -> Exp -> Term
linExp mcfg lang tree@(Tr at trees) = case at of
AC fun -> comp (Prelude.map lin trees) $ look fun
AS s -> R [kks (show s)] -- quoted
AI i -> R [kks (show i)]
AF d -> R [kks (show d)]
AM -> TM
where
lin = linExp mcfg lang
comp = compute mcfg lang
look = lookLin mcfg lang
```
The result of linearization is usually a record, which is realized as
a string using the following algorithm.
```
realize :: Term -> String
realize trm = case trm of
R (t:_) -> realize t
S ss -> unwords $ Prelude.map realize ss
K (KS s) -> s
K (KP s _) -> unwords s ---- prefix choice TODO
W s t -> s ++ realize t
FV (t:_) -> realize t
TM -> "?"
```
Since the order of record fields is not necessarily
the same as in GF source,
this realization does not work securely for
categories whose lincats more than one field.
===Term evaluation===
Evaluation follows call-by-value order, with two environments
needed:
- the grammar (a concrete syntax) to give the global constants
- an array of terms to give the subtree linearizations
The code is presented in one-level pattern matching, to
enable reimplementations in languages that do not permit
deep patterns (such as Java and C++).
```
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute mcfg lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
RP i t -> RP (comp i) (comp t)
W s t -> W s (comp t)
R ts -> R $ Prelude.map comp ts
V i -> idx args (fromInteger i) -- already computed
F c -> comp $ look c -- not computed (if contains V)
FV ts -> FV $ Prelude.map comp ts
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -> trm
look = lookLin mcfg lang
idx xs i = xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ Prelude.map (proj r) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> fromInteger i
RP p _ -> getIndex p
TM -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -> idx rs i
RP _ r -> getField r i
TM -> TM
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
```
===The special term constructors===
The three forms introduced by the compiler may a need special
explanation.
Global constants
```
Term ::= CId ;
```
are shorthands for complex terms. They are produced by the
compiler by (iterated) common subexpression elimination.
They are often more powerful than hand-devised code sharing in the source
code. They could be computed off-line by replacing each identifier by
its definition.
Prefix-suffix tables
```
Term ::= "(" String "+" Term ")" ;
```
represent tables of word forms divided to the longest common prefix
and its array of suffixes. In the example grammar above, we have
```
Sleep = [("sleep" + ["s",""])]
```
which in fact is equal to the array of full forms
```
["sleeps", "sleep"]
```
The power of this construction comes from the fact that suffix sets
tend to be repeated in a language, and can therefore be collected
by common subexpression elimination. It is this technique that
explains the used syntax rather than the more accurate
```
"(" String "+" [String] ")"
```
since we want the suffix part to be a ``Term`` for the optimization to
take effect.
The most curious construct of GFCC is the parameter array alias,
```
Term ::= "(" Term "@" Term ")";
```
This form is used as the value of parameter records, such as the type
```
{n : Number ; p : Person}
```
The problem with parameter records is their double role.
They can be used like parameter values, as indices in selection,
```
VP.s ! {n = Sg ; p = P3}
```
but also as records, from which parameters can be projected:
```
{n = Sg ; p = P3}.n
```
Whichever use is selected as primary, a prohibitively complex
case expression must be generated at compilation to GFCC to get the
other use. The adopted
solution is to generate a pair containing both a parameter value index
and an array of indices of record fields. For instance, if we have
```
param Number = Sg | Pl ; Person = P1 | P2 | P3 ;
```
we get the encoding
```
{n = Sg ; p = P3} ---> (2 @ [0,2])
```
The GFCC computation rules are essentially
```
(t ! (i @ _)) = (t ! i)
((_ @ r) ! j) =(r ! j)
```
==Compiling to GFCC==
Compilation to GFCC is performed by the GF grammar compiler, and
GFCC interpreters need not know what it does. For grammar writers,
however, it might be interesting to know what happens to the grammars
in the process.
The compilation phases are the following
+ translate GF source to GFC, as always in GF
+ undo GFC back-end optimizations
+ perform the ``values`` optimization to normalize tables
+ create a symbol table mapping the GFC parameter and record types to
fixed-size arrays, and parameter values and record labels to integers
+ traverse the linearization rules replacing parameters and labels by integers
+ reorganize the created GFC grammar so that it has just one abstract syntax
and one concrete syntax per language
+ apply UTF8 encoding to the grammar, if not yet applied (this is told by the
``coding`` flag)
+ translate the GFC syntax tree to a GFCC syntax tree, using a simple
compositional mapping
+ perform the word-suffix optimization on GFCC linearization terms
+ perform subexpression elimination on each concrete syntax module
+ print out the GFCC code
Notice that a major part of the compilation is done within GFC, so that
GFC-related tasks (such as parser generation) could be performed by
using the old algorithms.
===Problems in GFCC compilation===
Two major problems had to be solved in compiling GFC to GFCC:
- consistent order of tables and records, to permit the array translation
- run-time variables in complex parameter values.
The current implementation is still experimental and may fail
to generate correct code. Any errors remaining are likely to be
related to the two problems just mentioned.
The order problem is solved in different ways for tables and records.
For tables, the ``values`` optimization of GFC already manages to
maintain a canonical order. But this order can be destroyed by the
``share`` optimization. To make sure that GFCC compilation works properly,
it is safest to recompile the GF grammar by using the ``values``
optimization flag.
Records can be canonically ordered by sorting them by labels.
In fact, this was done in connection of the GFCC work as a part
of the GFC generation, to guarantee consistency. This means that
e.g. the ``s`` field will in general no longer appear as the first
field, even if it does so in the GF source code. But relying on the
order of fields in a labelled record would be misplaced anyway.
The canonical form of records is further complicated by lock fields,
i.e. dummy fields of form ``lock_C = <>``, which are added to grammar
libraries to force intensionality of linearization types. The problem
is that the absence of a lock field only generates a warning, not
an error. Therefore a GFC grammar can contain objects of the same
type with and without a lock field. This problem was solved in GFCC
generation by just removing all lock fields (defined as fields whose
type is the empty record type). This has the further advantage of
(slightly) reducing the grammar size. More importantly, it is safe
to remove lock fields, because they are never used in computation,
and because intensional types are only needed in grammars reused
as libraries, not in grammars used at runtime.
While the order problem is rather bureaucratic in nature, run-time
variables are an interesting problem. They arise in the presence
of complex parameter values, created by argument-taking constructors
and parameter records. To give an example, consider the GF parameter
type system
```
Number = Sg | Pl ;
Person = P1 | P2 | P3 ;
Agr = Ag Number Person ;
```
The values can be translated to integers in the expected way,
```
Sg = 0, Pl = 1
P1 = 0, P2 = 1, P3 = 2
Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
```
However, an argument of ``Agr`` can be a run-time variable, as in
```
Ag np.n P3
```
This expression must first be translated to a case expression,
```
case np.n of {
0 => 2 ;
1 => 5
}
```
which can then be translated to the GFCC term
```
([2,5] ! ($0 ! $1))
```
assuming that the variable ``np`` is the first argument and that its
``Number`` field is the second in the record.
This transformation of course has to be performed recursively, since
there can be several run-time variables in a parameter value:
```
Ag np.n np.p
```
A similar transformation would be possible to deal with the double
role of parameter records discussed above. Thus the type
```
RNP = {n : Number ; p : Person}
```
could be uniformly translated into the set ``{0,1,2,3,4,5}``
as ``Agr`` above. Selections would be simple instances of indexing.
But any projection from the record should be translated into
a case expression,
```
rnp.n ===>
case rnp of {
0 => 0 ;
1 => 0 ;
2 => 0 ;
3 => 1 ;
4 => 1 ;
5 => 1
}
```
To avoid the code bloat resulting from this, we chose the alias representation
which is easy enough to deal with in interpreters.
===The representation of linearization types===
Linearization types (``lincat``) are not needed when generating with
GFCC, but they have been added to enable parser generation directly from
GFCC. The linearization type definitions are shown as a part of the
concrete syntax, by using terms to represent types. Here is the table
showing how different linearization types are encoded.
```
P* = size(P) -- parameter type
{_ : I ; __ : R}* = (I* @ R*) -- record of parameters
{r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- other record
(P => T)* = [T* ,...,T*] -- size(P) times
Str* = ()
```
The category symbols are prefixed with two underscores (``__``).
For example, the linearization type ``present/CatEng.NP`` is
translated as follows:
```
NP = {
a : { -- 6 = 2*3 values
n : {ParamX.Number} ; -- 2 values
p : {ParamX.Person} -- 3 values
} ;
s : {ResEng.Case} => Str -- 3 values
}
__NP = [(6@[2,3]),[(),(),()]]
```
===Running the compiler and the GFCC interpreter===
GFCC generation is a part of the
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
of GF since September 2006. To invoke the compiler, the flag
``-printer=gfcc`` to the command
``pm = print_multi`` is used. It is wise to recompile the grammar from
source, since previously compiled libraries may not obey the canonical
order of records. To ``strip`` the grammar before
GFCC translation removes unnecessary interface references.
Here is an example, performed in
[example/bronzeage ../../../../../examples/bronzeage].
```
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
strip
pm -printer=gfcc | wf bronze.gfcc
```
==The reference interpreter==
The reference interpreter written in Haskell consists of the following files:
```
-- source file for BNFC
GFCC.cf -- labelled BNF grammar of gfcc
-- files generated by BNFC
AbsGFCC.hs -- abstrac syntax of gfcc
ErrM.hs -- error monad used internally
LexGFCC.hs -- lexer of gfcc files
ParGFCC.hs -- parser of gfcc files and syntax trees
PrintGFCC.hs -- printer of gfcc files and syntax trees
-- hand-written files
DataGFCC.hs -- post-parser grammar creation, linearization and evaluation
GenGFCC.hs -- random and exhaustive generation, generate-and-test parsing
RunGFCC.hs -- main function - a simple command interpreter
```
It is included in the
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
of GF, in the subdirectory [``GF/src/GF/Canon/GFCC`` ../].
To compile the interpreter, type
```
make gfcc
```
in ``GF/src``. To run it, type
```
./gfcc <GFCC-file>
```
The available commands are
- ``gr <Cat> <Int>``: generate a number of random trees in category.
and show their linearizations in all languages
- ``grt <Cat> <Int>``: generate a number of random trees in category.
and show the trees and their linearizations in all languages
- ``gt <Cat> <Int>``: generate a number of trees in category from smallest,
and show their linearizations in all languages
- ``gtt <Cat> <Int>``: generate a number of trees in category from smallest,
and show the trees and their linearizations in all languages
- ``p <Int> <Cat> <String>``: "parse", i.e. generate trees until match or
until the given number have been generated
- ``<Tree>``: linearize tree in all languages, also showing full records
- ``quit``: terminate the system cleanly
==Interpreter in C++==
A base-line interpreter in C++ has been started.
Its main functionality is random generation of trees and linearization of them.
Here are some results from running the different interpreters, compared
to running the same grammar in GF, saved in ``.gfcm`` format.
The grammar contains the English, German, and Norwegian
versions of Bronzeage. The experiment was carried out on
Ubuntu Linux laptop with 1.5 GHz Intel centrino processor.
|| | GF | gfcc(hs) | gfcc++ |
| program size | 7249k | 803k | 113k
| grammar size | 336k | 119k | 119k
| read grammar | 1150ms | 510ms | 100ms
| generate 222 | 9500ms | 450ms | 800ms
| memory | 21M | 10M | 20M
To summarize:
- going from GF to gfcc is a major win in both code size and efficiency
- going from Haskell to C++ interpreter is not a win yet, because of a space
leak in the C++ version
==Some things to do==
Interpreter in Java.
Parsing via MCFG
- the FCFG format can possibly be simplified
- parser grammars should be saved in files to make interpreters easier
Hand-written parsers for GFCC grammars to reduce code size
(and efficiency?) of interpreters.
Binary format and/or file compression of GFCC output.
Syntax editor based on GFCC.
Rewriting of resource libraries in order to exploit the
word-suffix sharing better (depth-one tables, as in FM).