diff --git a/src/GF/Devel/GrammarToGFCC.hs b/src/GF/Devel/GrammarToGFCC.hs index a61a2ec49..b7eaebe31 100644 --- a/src/GF/Devel/GrammarToGFCC.hs +++ b/src/GF/Devel/GrammarToGFCC.hs @@ -307,8 +307,8 @@ type ParamEnv = paramValues :: SourceGrammar -> ParamEnv paramValues cgr = (labels,untyps,typs) where partyps = nub $ - [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt - ++ [ty | + --- [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt + [ty | (_,(_,CncCat (Yes (RecType ls)) _ _)) <- jments, ty0 <- [ty | (_, ty) <- unlockTyp ls], ty <- typsFrom ty0 diff --git a/src/GF/GFCC/Linearize.hs b/src/GF/GFCC/Linearize.hs index d087384bf..7d5e6b010 100644 --- a/src/GF/GFCC/Linearize.hs +++ b/src/GF/GFCC/Linearize.hs @@ -31,8 +31,9 @@ linExp mcfg lang tree@(DTr _ at trees) = ---- bindings TODO case at of AC fun -> comp (lmap lin trees) $ look fun AS s -> R [kks (show s)] -- quoted - AI i -> R [C lst, kks (show i), C size] where - lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1 + AI i -> R [kks (show i)] + --- [C lst, kks (show i), C size] where + --- lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1 AF d -> R [kks (show d)] AM _ -> TM where diff --git a/src/GF/GFCC/Raw/ConvertGFCC.hs b/src/GF/GFCC/Raw/ConvertGFCC.hs index fbed72e69..3bfed9c83 100644 --- a/src/GF/GFCC/Raw/ConvertGFCC.hs +++ b/src/GF/GFCC/Raw/ConvertGFCC.hs @@ -73,7 +73,7 @@ toExp e = case e of App fun [App (CId "B") xs, App (CId "X") exps] -> DTr [x | AId x <- xs] (AC fun) (lmap toExp exps) App (CId "Eq") eqs -> - EEq [Equ (lmap toExp ps) (toExp v) | App (CId "Case") (v:ps) <- eqs] + EEq [Equ (lmap toExp ps) (toExp v) | App (CId "E") (v:ps) <- eqs] AMet -> DTr [] (AM 0) [] AInt i -> DTr [] (AI i) [] AFlt i -> DTr [] (AF i) [] @@ -147,7 +147,7 @@ fromExp e = case e of DTr [] (AI i) [] -> AInt (toInteger i) DTr [] (AM _) [] -> AMet ---- EEq eqs -> - App (CId "Eq") [App (CId "Case") (lmap fromExp (v:ps)) | Equ ps v <- eqs] + App (CId "Eq") [App (CId "E") (lmap fromExp (v:ps)) | Equ ps v <- eqs] _ -> error $ "exp " ++ show e fromTerm :: Term -> RExp diff --git a/src/GF/GFCC/doc/gfcc.txt b/src/GF/GFCC/doc/gfcc.txt index 6a78a62f6..5dcf2fbdc 100644 --- a/src/GF/GFCC/doc/gfcc.txt +++ b/src/GF/GFCC/doc/gfcc.txt @@ -1,6 +1,6 @@ The GFCC Grammar Format Aarne Ranta -October 5, 2007 +December 14, 2007 Author's address: [``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] @@ -8,6 +8,7 @@ Author's address: % to compile: txt2tags -thtml --toc gfcc.txt History: +- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC - 5 Oct 2007: new, better structured GFCC with full expressive power - 19 Oct: translation of lincats, new figures on C++ - 3 Oct 2006: first version @@ -53,7 +54,8 @@ will be used instead. GFC provides only marginal advantages as a target format compared with GF, and it is therefore just extra weight to carry around this format. -The main differences of GFCC compared with GFC (and GF) can be summarized as follows: +The main differences of GFCC compared with GFC (and GF) can be +summarized as follows: - there are no modules, and therefore no qualified names - a GFCC grammar is multilingual, and consists of a common abstract syntax together with one concrete syntax per language @@ -66,7 +68,8 @@ The main differences of GFCC compared with GFC (and GF) can be summarized as fol Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned; thus they do not completely +as translated to GFCC. The representations are aligned; +thus they do not completely reflect the order of judgements in GFCC files, which have different orders of blocks of judgements, and alphabetical sorting. ``` diff --git a/src/GF/GFCC/doc/syntax.txt b/src/GF/GFCC/doc/syntax.txt new file mode 100644 index 000000000..6bb3b8d97 --- /dev/null +++ b/src/GF/GFCC/doc/syntax.txt @@ -0,0 +1,182 @@ +GFCC Syntax + + +==Syntax of GFCC files== + +The parser syntax is very simple, as defined in BNF: +``` + Grm. Grammar ::= [RExp] ; + + App. RExp ::= "(" CId [RExp] ")" ; + AId. RExp ::= CId ; + AInt. RExp ::= Integer ; + AStr. RExp ::= String ; + AFlt. RExp ::= Double ; + AMet. RExp ::= "?" ; + + terminator RExp "" ; + + token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; +``` +While a parser and a printer can be generated for many languages +from this grammar by using the BNF Converter, a parser is also +easy to write by hand using recursive descent. + + +==Syntax of well-formed GFCC code== + +Here is a summary of well-formed syntax, +with a comment on the semantics of each construction. +``` + Grammar ::= + CId -- abstract syntax names + "(" "concrete" CId* ")" -- concrete syntax names + "(" "flags" Flag* ")" -- global flags + "(" "abstract" Abstract ")" -- abstract syntax + "(" "concrete" Concrete* ")" -- concrete syntaxes + + Abstract ::= + "(" "flags" Flag* ")" -- abstract flags + "(" "fun" FunDef* ")" -- function definitions + "(" "cat" CatDef* ")" -- category definitions + + Concrete ::= + "(" CId -- language name + "flags" Flag* -- concrete flags + "lin" LinDef* -- linearization rules + "oper" LinDef* -- operations (macros) + "lincat" LinDef* -- linearization type definitions + "lindef" LinDef* -- linearization default definitions + "printname" LinDef* -- printname definitions + "param" LinDef* -- lincats with labels and parameter value names + ")" + + Flag ::= "(" CId String ")" -- flag and value + FunDef ::= "(" CId Type Exp ")" -- function, type, and definition + CatDef ::= "(" CId Hypo* ")" -- category and context + LinDef ::= "(" CId Term ")" -- function and definition + + Type ::= + "(" CId -- value category + "(" "H" Hypo* ")" -- argument context + "(" "X" Exp* ")" ")" -- arguments (of dependent value type) + + Exp ::= + "(" CId -- function + "(" "B" CId* ")" -- bindings + "(" "X" Exp* ")" ")" -- arguments + | CId -- variable + | "?" -- metavariable + | "(" "Eq" Equation* ")" -- group of pattern equations + | Integer -- integer literal (non-negative) + | Float -- floating-point literal (non-negative) + | String -- string literal (in double quotes) + + Hypo ::= "(" CId Type ")" -- variable and type + + Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list + + Term ::= + "(" "R" Term* ")" -- array (record or table) + | "(" "S" Term* ")" -- concatenated sequence + | "(" "FV" Term* ")" -- free variant list + | "(" "P" Term Term ")" -- access to index (projection or selection) + | "(" "W" String Term ")" -- token prefix with suffix list + | "(" "A" Integer ")" -- pointer to subtree + | String -- token (in double quotes) + | Integer -- index in array + | CId -- macro constant + | "?" -- metavariable +``` + + +==GFCC interpreter== + +The first phase in interpreting GFCC is to parse a GFCC file and +build an internal abstract syntax representation, as specified +in the previous section. + +With this representation, linearization can be performed by +a straightforward function from expressions (``Exp``) to terms +(``Term``). All expressions except groups of pattern equations +can be linearized. + +Here is a reference Haskell implementation of linearization: +``` + linExp :: GFCC -> CId -> Exp -> Term + linExp gfcc lang tree@(DTr _ at trees) = case at of + AC fun -> comp (map lin trees) $ look fun + AS s -> R [K (show s)] -- quoted + AI i -> R [K (show i)] + AF d -> R [K (show d)] + AM -> TM + where + lin = linExp gfcc lang + comp = compute gfcc lang + look = lookLin gfcc lang +``` +TODO: bindings must be supported. + +Terms resulting from linearization are evaluated in +call-by-value order, with two environments needed: +- the grammar (a concrete syntax) to give the global constants +- an array of terms to give the subtree linearizations + + +The Haskell implementation works as follows: +``` +compute :: GFCC -> CId -> [Term] -> Term -> Term +compute gfcc lang args = comp where + comp trm = case trm of + P r p -> proj (comp r) (comp p) + W s t -> W s (comp t) + R ts -> R $ map comp ts + V i -> idx args (fromInteger i) -- already computed + F c -> comp $ look c -- not computed (if contains V) + FV ts -> FV $ Prelude.map comp ts + S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts + _ -> trm + + look = lookOper gfcc lang + + idx xs i = xs !! i + + proj r p = case (r,p) of + (_, FV ts) -> FV $ Prelude.map (proj r) ts + (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts + (W s t, _) -> kks (s ++ getString (proj t p)) + _ -> comp $ getField r (getIndex p) + + getString t = case t of + K (KS s) -> s + _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" + + getIndex t = case t of + C i -> fromInteger i + RP p _ -> getIndex p + TM -> 0 -- default value for parameter + _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 + + getField t i = case t of + R rs -> idx rs i + RP _ r -> getField r i + TM -> TM + _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t +``` +The result of linearization is usually a record, which is realized as +a string using the following algorithm. +``` + realize :: Term -> String + realize trm = case trm of + R (t:_) -> realize t + S ss -> unwords $ map realize ss + K s -> s + W s t -> s ++ realize t + FV (t:_) -> realize t -- TODO: all variants + TM -> "?" +``` +Notice that realization always picks the first field of a record. +If a linearization type has more than one field, the first field +does not necessarily contain the desired string. +Also notice that the order of record fields in GFCC is not necessarily +the same as in GF source. diff --git a/src/GF/Grammar/Lookup.hs b/src/GF/Grammar/Lookup.hs index 481512751..81a62decf 100644 --- a/src/GF/Grammar/Lookup.hs +++ b/src/GF/Grammar/Lookup.hs @@ -231,10 +231,10 @@ lookupAbsDef gr m c = errIn ("looking up absdef of" +++ prt c) $ do _ -> Bad $ prt m +++ "is not an abstract module" linTypeInt :: Type -linTypeInt = - let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in - RecType [ - (LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)] +linTypeInt = defLinType +--- let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in +--- RecType [ +--- (LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)] lookupLincat :: SourceGrammar -> Ident -> Ident -> Err Type lookupLincat gr m c | elem c [zIdent "Int"] = return linTypeInt