forked from GitHub/gf-core
took back smart type of Int ; Digits type in resource and some adjustments of Det syntax (not yet for romance and russian)
This commit is contained in:
@@ -307,8 +307,8 @@ type ParamEnv =
|
||||
paramValues :: SourceGrammar -> ParamEnv
|
||||
paramValues cgr = (labels,untyps,typs) where
|
||||
partyps = nub $
|
||||
[App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt
|
||||
++ [ty |
|
||||
--- [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt
|
||||
[ty |
|
||||
(_,(_,CncCat (Yes (RecType ls)) _ _)) <- jments,
|
||||
ty0 <- [ty | (_, ty) <- unlockTyp ls],
|
||||
ty <- typsFrom ty0
|
||||
|
||||
@@ -31,8 +31,9 @@ linExp mcfg lang tree@(DTr _ at trees) = ---- bindings TODO
|
||||
case at of
|
||||
AC fun -> comp (lmap lin trees) $ look fun
|
||||
AS s -> R [kks (show s)] -- quoted
|
||||
AI i -> R [C lst, kks (show i), C size] where
|
||||
lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1
|
||||
AI i -> R [kks (show i)]
|
||||
--- [C lst, kks (show i), C size] where
|
||||
--- lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1
|
||||
AF d -> R [kks (show d)]
|
||||
AM _ -> TM
|
||||
where
|
||||
|
||||
@@ -73,7 +73,7 @@ toExp e = case e of
|
||||
App fun [App (CId "B") xs, App (CId "X") exps] ->
|
||||
DTr [x | AId x <- xs] (AC fun) (lmap toExp exps)
|
||||
App (CId "Eq") eqs ->
|
||||
EEq [Equ (lmap toExp ps) (toExp v) | App (CId "Case") (v:ps) <- eqs]
|
||||
EEq [Equ (lmap toExp ps) (toExp v) | App (CId "E") (v:ps) <- eqs]
|
||||
AMet -> DTr [] (AM 0) []
|
||||
AInt i -> DTr [] (AI i) []
|
||||
AFlt i -> DTr [] (AF i) []
|
||||
@@ -147,7 +147,7 @@ fromExp e = case e of
|
||||
DTr [] (AI i) [] -> AInt (toInteger i)
|
||||
DTr [] (AM _) [] -> AMet ----
|
||||
EEq eqs ->
|
||||
App (CId "Eq") [App (CId "Case") (lmap fromExp (v:ps)) | Equ ps v <- eqs]
|
||||
App (CId "Eq") [App (CId "E") (lmap fromExp (v:ps)) | Equ ps v <- eqs]
|
||||
_ -> error $ "exp " ++ show e
|
||||
|
||||
fromTerm :: Term -> RExp
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
The GFCC Grammar Format
|
||||
Aarne Ranta
|
||||
October 5, 2007
|
||||
December 14, 2007
|
||||
|
||||
Author's address:
|
||||
[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
|
||||
@@ -8,6 +8,7 @@ Author's address:
|
||||
% to compile: txt2tags -thtml --toc gfcc.txt
|
||||
|
||||
History:
|
||||
- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC
|
||||
- 5 Oct 2007: new, better structured GFCC with full expressive power
|
||||
- 19 Oct: translation of lincats, new figures on C++
|
||||
- 3 Oct 2006: first version
|
||||
@@ -53,7 +54,8 @@ will be used instead. GFC provides only marginal advantages as a target format
|
||||
compared with GF, and it is therefore just extra weight to carry around this
|
||||
format.
|
||||
|
||||
The main differences of GFCC compared with GFC (and GF) can be summarized as follows:
|
||||
The main differences of GFCC compared with GFC (and GF) can be
|
||||
summarized as follows:
|
||||
- there are no modules, and therefore no qualified names
|
||||
- a GFCC grammar is multilingual, and consists of a common abstract syntax
|
||||
together with one concrete syntax per language
|
||||
@@ -66,7 +68,8 @@ The main differences of GFCC compared with GFC (and GF) can be summarized as fol
|
||||
|
||||
|
||||
Here is an example of a GF grammar, consisting of three modules,
|
||||
as translated to GFCC. The representations are aligned; thus they do not completely
|
||||
as translated to GFCC. The representations are aligned;
|
||||
thus they do not completely
|
||||
reflect the order of judgements in GFCC files, which have different orders of
|
||||
blocks of judgements, and alphabetical sorting.
|
||||
```
|
||||
|
||||
182
src/GF/GFCC/doc/syntax.txt
Normal file
182
src/GF/GFCC/doc/syntax.txt
Normal file
@@ -0,0 +1,182 @@
|
||||
GFCC Syntax
|
||||
|
||||
|
||||
==Syntax of GFCC files==
|
||||
|
||||
The parser syntax is very simple, as defined in BNF:
|
||||
```
|
||||
Grm. Grammar ::= [RExp] ;
|
||||
|
||||
App. RExp ::= "(" CId [RExp] ")" ;
|
||||
AId. RExp ::= CId ;
|
||||
AInt. RExp ::= Integer ;
|
||||
AStr. RExp ::= String ;
|
||||
AFlt. RExp ::= Double ;
|
||||
AMet. RExp ::= "?" ;
|
||||
|
||||
terminator RExp "" ;
|
||||
|
||||
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
|
||||
```
|
||||
While a parser and a printer can be generated for many languages
|
||||
from this grammar by using the BNF Converter, a parser is also
|
||||
easy to write by hand using recursive descent.
|
||||
|
||||
|
||||
==Syntax of well-formed GFCC code==
|
||||
|
||||
Here is a summary of well-formed syntax,
|
||||
with a comment on the semantics of each construction.
|
||||
```
|
||||
Grammar ::=
|
||||
CId -- abstract syntax names
|
||||
"(" "concrete" CId* ")" -- concrete syntax names
|
||||
"(" "flags" Flag* ")" -- global flags
|
||||
"(" "abstract" Abstract ")" -- abstract syntax
|
||||
"(" "concrete" Concrete* ")" -- concrete syntaxes
|
||||
|
||||
Abstract ::=
|
||||
"(" "flags" Flag* ")" -- abstract flags
|
||||
"(" "fun" FunDef* ")" -- function definitions
|
||||
"(" "cat" CatDef* ")" -- category definitions
|
||||
|
||||
Concrete ::=
|
||||
"(" CId -- language name
|
||||
"flags" Flag* -- concrete flags
|
||||
"lin" LinDef* -- linearization rules
|
||||
"oper" LinDef* -- operations (macros)
|
||||
"lincat" LinDef* -- linearization type definitions
|
||||
"lindef" LinDef* -- linearization default definitions
|
||||
"printname" LinDef* -- printname definitions
|
||||
"param" LinDef* -- lincats with labels and parameter value names
|
||||
")"
|
||||
|
||||
Flag ::= "(" CId String ")" -- flag and value
|
||||
FunDef ::= "(" CId Type Exp ")" -- function, type, and definition
|
||||
CatDef ::= "(" CId Hypo* ")" -- category and context
|
||||
LinDef ::= "(" CId Term ")" -- function and definition
|
||||
|
||||
Type ::=
|
||||
"(" CId -- value category
|
||||
"(" "H" Hypo* ")" -- argument context
|
||||
"(" "X" Exp* ")" ")" -- arguments (of dependent value type)
|
||||
|
||||
Exp ::=
|
||||
"(" CId -- function
|
||||
"(" "B" CId* ")" -- bindings
|
||||
"(" "X" Exp* ")" ")" -- arguments
|
||||
| CId -- variable
|
||||
| "?" -- metavariable
|
||||
| "(" "Eq" Equation* ")" -- group of pattern equations
|
||||
| Integer -- integer literal (non-negative)
|
||||
| Float -- floating-point literal (non-negative)
|
||||
| String -- string literal (in double quotes)
|
||||
|
||||
Hypo ::= "(" CId Type ")" -- variable and type
|
||||
|
||||
Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list
|
||||
|
||||
Term ::=
|
||||
"(" "R" Term* ")" -- array (record or table)
|
||||
| "(" "S" Term* ")" -- concatenated sequence
|
||||
| "(" "FV" Term* ")" -- free variant list
|
||||
| "(" "P" Term Term ")" -- access to index (projection or selection)
|
||||
| "(" "W" String Term ")" -- token prefix with suffix list
|
||||
| "(" "A" Integer ")" -- pointer to subtree
|
||||
| String -- token (in double quotes)
|
||||
| Integer -- index in array
|
||||
| CId -- macro constant
|
||||
| "?" -- metavariable
|
||||
```
|
||||
|
||||
|
||||
==GFCC interpreter==
|
||||
|
||||
The first phase in interpreting GFCC is to parse a GFCC file and
|
||||
build an internal abstract syntax representation, as specified
|
||||
in the previous section.
|
||||
|
||||
With this representation, linearization can be performed by
|
||||
a straightforward function from expressions (``Exp``) to terms
|
||||
(``Term``). All expressions except groups of pattern equations
|
||||
can be linearized.
|
||||
|
||||
Here is a reference Haskell implementation of linearization:
|
||||
```
|
||||
linExp :: GFCC -> CId -> Exp -> Term
|
||||
linExp gfcc lang tree@(DTr _ at trees) = case at of
|
||||
AC fun -> comp (map lin trees) $ look fun
|
||||
AS s -> R [K (show s)] -- quoted
|
||||
AI i -> R [K (show i)]
|
||||
AF d -> R [K (show d)]
|
||||
AM -> TM
|
||||
where
|
||||
lin = linExp gfcc lang
|
||||
comp = compute gfcc lang
|
||||
look = lookLin gfcc lang
|
||||
```
|
||||
TODO: bindings must be supported.
|
||||
|
||||
Terms resulting from linearization are evaluated in
|
||||
call-by-value order, with two environments needed:
|
||||
- the grammar (a concrete syntax) to give the global constants
|
||||
- an array of terms to give the subtree linearizations
|
||||
|
||||
|
||||
The Haskell implementation works as follows:
|
||||
```
|
||||
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
||||
compute gfcc lang args = comp where
|
||||
comp trm = case trm of
|
||||
P r p -> proj (comp r) (comp p)
|
||||
W s t -> W s (comp t)
|
||||
R ts -> R $ map comp ts
|
||||
V i -> idx args (fromInteger i) -- already computed
|
||||
F c -> comp $ look c -- not computed (if contains V)
|
||||
FV ts -> FV $ Prelude.map comp ts
|
||||
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
||||
_ -> trm
|
||||
|
||||
look = lookOper gfcc lang
|
||||
|
||||
idx xs i = xs !! i
|
||||
|
||||
proj r p = case (r,p) of
|
||||
(_, FV ts) -> FV $ Prelude.map (proj r) ts
|
||||
(FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts
|
||||
(W s t, _) -> kks (s ++ getString (proj t p))
|
||||
_ -> comp $ getField r (getIndex p)
|
||||
|
||||
getString t = case t of
|
||||
K (KS s) -> s
|
||||
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
|
||||
|
||||
getIndex t = case t of
|
||||
C i -> fromInteger i
|
||||
RP p _ -> getIndex p
|
||||
TM -> 0 -- default value for parameter
|
||||
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
|
||||
|
||||
getField t i = case t of
|
||||
R rs -> idx rs i
|
||||
RP _ r -> getField r i
|
||||
TM -> TM
|
||||
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
|
||||
```
|
||||
The result of linearization is usually a record, which is realized as
|
||||
a string using the following algorithm.
|
||||
```
|
||||
realize :: Term -> String
|
||||
realize trm = case trm of
|
||||
R (t:_) -> realize t
|
||||
S ss -> unwords $ map realize ss
|
||||
K s -> s
|
||||
W s t -> s ++ realize t
|
||||
FV (t:_) -> realize t -- TODO: all variants
|
||||
TM -> "?"
|
||||
```
|
||||
Notice that realization always picks the first field of a record.
|
||||
If a linearization type has more than one field, the first field
|
||||
does not necessarily contain the desired string.
|
||||
Also notice that the order of record fields in GFCC is not necessarily
|
||||
the same as in GF source.
|
||||
@@ -231,10 +231,10 @@ lookupAbsDef gr m c = errIn ("looking up absdef of" +++ prt c) $ do
|
||||
_ -> Bad $ prt m +++ "is not an abstract module"
|
||||
|
||||
linTypeInt :: Type
|
||||
linTypeInt =
|
||||
let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in
|
||||
RecType [
|
||||
(LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)]
|
||||
linTypeInt = defLinType
|
||||
--- let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in
|
||||
--- RecType [
|
||||
--- (LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)]
|
||||
|
||||
lookupLincat :: SourceGrammar -> Ident -> Ident -> Err Type
|
||||
lookupLincat gr m c | elem c [zIdent "Int"] = return linTypeInt
|
||||
|
||||
Reference in New Issue
Block a user