1
0
forked from GitHub/gf-core

took back smart type of Int ; Digits type in resource and some adjustments of Det syntax (not yet for romance and russian)

This commit is contained in:
aarne
2007-12-17 18:12:46 +00:00
parent 7551c70db6
commit 27602f4f82
6 changed files with 199 additions and 13 deletions

View File

@@ -307,8 +307,8 @@ type ParamEnv =
paramValues :: SourceGrammar -> ParamEnv
paramValues cgr = (labels,untyps,typs) where
partyps = nub $
[App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt
++ [ty |
--- [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt
[ty |
(_,(_,CncCat (Yes (RecType ls)) _ _)) <- jments,
ty0 <- [ty | (_, ty) <- unlockTyp ls],
ty <- typsFrom ty0

View File

@@ -31,8 +31,9 @@ linExp mcfg lang tree@(DTr _ at trees) = ---- bindings TODO
case at of
AC fun -> comp (lmap lin trees) $ look fun
AS s -> R [kks (show s)] -- quoted
AI i -> R [C lst, kks (show i), C size] where
lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1
AI i -> R [kks (show i)]
--- [C lst, kks (show i), C size] where
--- lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1
AF d -> R [kks (show d)]
AM _ -> TM
where

View File

@@ -73,7 +73,7 @@ toExp e = case e of
App fun [App (CId "B") xs, App (CId "X") exps] ->
DTr [x | AId x <- xs] (AC fun) (lmap toExp exps)
App (CId "Eq") eqs ->
EEq [Equ (lmap toExp ps) (toExp v) | App (CId "Case") (v:ps) <- eqs]
EEq [Equ (lmap toExp ps) (toExp v) | App (CId "E") (v:ps) <- eqs]
AMet -> DTr [] (AM 0) []
AInt i -> DTr [] (AI i) []
AFlt i -> DTr [] (AF i) []
@@ -147,7 +147,7 @@ fromExp e = case e of
DTr [] (AI i) [] -> AInt (toInteger i)
DTr [] (AM _) [] -> AMet ----
EEq eqs ->
App (CId "Eq") [App (CId "Case") (lmap fromExp (v:ps)) | Equ ps v <- eqs]
App (CId "Eq") [App (CId "E") (lmap fromExp (v:ps)) | Equ ps v <- eqs]
_ -> error $ "exp " ++ show e
fromTerm :: Term -> RExp

View File

@@ -1,6 +1,6 @@
The GFCC Grammar Format
Aarne Ranta
October 5, 2007
December 14, 2007
Author's address:
[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
@@ -8,6 +8,7 @@ Author's address:
% to compile: txt2tags -thtml --toc gfcc.txt
History:
- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC
- 5 Oct 2007: new, better structured GFCC with full expressive power
- 19 Oct: translation of lincats, new figures on C++
- 3 Oct 2006: first version
@@ -53,7 +54,8 @@ will be used instead. GFC provides only marginal advantages as a target format
compared with GF, and it is therefore just extra weight to carry around this
format.
The main differences of GFCC compared with GFC (and GF) can be summarized as follows:
The main differences of GFCC compared with GFC (and GF) can be
summarized as follows:
- there are no modules, and therefore no qualified names
- a GFCC grammar is multilingual, and consists of a common abstract syntax
together with one concrete syntax per language
@@ -66,7 +68,8 @@ The main differences of GFCC compared with GFC (and GF) can be summarized as fol
Here is an example of a GF grammar, consisting of three modules,
as translated to GFCC. The representations are aligned; thus they do not completely
as translated to GFCC. The representations are aligned;
thus they do not completely
reflect the order of judgements in GFCC files, which have different orders of
blocks of judgements, and alphabetical sorting.
```

182
src/GF/GFCC/doc/syntax.txt Normal file
View File

@@ -0,0 +1,182 @@
GFCC Syntax
==Syntax of GFCC files==
The parser syntax is very simple, as defined in BNF:
```
Grm. Grammar ::= [RExp] ;
App. RExp ::= "(" CId [RExp] ")" ;
AId. RExp ::= CId ;
AInt. RExp ::= Integer ;
AStr. RExp ::= String ;
AFlt. RExp ::= Double ;
AMet. RExp ::= "?" ;
terminator RExp "" ;
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
```
While a parser and a printer can be generated for many languages
from this grammar by using the BNF Converter, a parser is also
easy to write by hand using recursive descent.
==Syntax of well-formed GFCC code==
Here is a summary of well-formed syntax,
with a comment on the semantics of each construction.
```
Grammar ::=
CId -- abstract syntax names
"(" "concrete" CId* ")" -- concrete syntax names
"(" "flags" Flag* ")" -- global flags
"(" "abstract" Abstract ")" -- abstract syntax
"(" "concrete" Concrete* ")" -- concrete syntaxes
Abstract ::=
"(" "flags" Flag* ")" -- abstract flags
"(" "fun" FunDef* ")" -- function definitions
"(" "cat" CatDef* ")" -- category definitions
Concrete ::=
"(" CId -- language name
"flags" Flag* -- concrete flags
"lin" LinDef* -- linearization rules
"oper" LinDef* -- operations (macros)
"lincat" LinDef* -- linearization type definitions
"lindef" LinDef* -- linearization default definitions
"printname" LinDef* -- printname definitions
"param" LinDef* -- lincats with labels and parameter value names
")"
Flag ::= "(" CId String ")" -- flag and value
FunDef ::= "(" CId Type Exp ")" -- function, type, and definition
CatDef ::= "(" CId Hypo* ")" -- category and context
LinDef ::= "(" CId Term ")" -- function and definition
Type ::=
"(" CId -- value category
"(" "H" Hypo* ")" -- argument context
"(" "X" Exp* ")" ")" -- arguments (of dependent value type)
Exp ::=
"(" CId -- function
"(" "B" CId* ")" -- bindings
"(" "X" Exp* ")" ")" -- arguments
| CId -- variable
| "?" -- metavariable
| "(" "Eq" Equation* ")" -- group of pattern equations
| Integer -- integer literal (non-negative)
| Float -- floating-point literal (non-negative)
| String -- string literal (in double quotes)
Hypo ::= "(" CId Type ")" -- variable and type
Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list
Term ::=
"(" "R" Term* ")" -- array (record or table)
| "(" "S" Term* ")" -- concatenated sequence
| "(" "FV" Term* ")" -- free variant list
| "(" "P" Term Term ")" -- access to index (projection or selection)
| "(" "W" String Term ")" -- token prefix with suffix list
| "(" "A" Integer ")" -- pointer to subtree
| String -- token (in double quotes)
| Integer -- index in array
| CId -- macro constant
| "?" -- metavariable
```
==GFCC interpreter==
The first phase in interpreting GFCC is to parse a GFCC file and
build an internal abstract syntax representation, as specified
in the previous section.
With this representation, linearization can be performed by
a straightforward function from expressions (``Exp``) to terms
(``Term``). All expressions except groups of pattern equations
can be linearized.
Here is a reference Haskell implementation of linearization:
```
linExp :: GFCC -> CId -> Exp -> Term
linExp gfcc lang tree@(DTr _ at trees) = case at of
AC fun -> comp (map lin trees) $ look fun
AS s -> R [K (show s)] -- quoted
AI i -> R [K (show i)]
AF d -> R [K (show d)]
AM -> TM
where
lin = linExp gfcc lang
comp = compute gfcc lang
look = lookLin gfcc lang
```
TODO: bindings must be supported.
Terms resulting from linearization are evaluated in
call-by-value order, with two environments needed:
- the grammar (a concrete syntax) to give the global constants
- an array of terms to give the subtree linearizations
The Haskell implementation works as follows:
```
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute gfcc lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
W s t -> W s (comp t)
R ts -> R $ map comp ts
V i -> idx args (fromInteger i) -- already computed
F c -> comp $ look c -- not computed (if contains V)
FV ts -> FV $ Prelude.map comp ts
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -> trm
look = lookOper gfcc lang
idx xs i = xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ Prelude.map (proj r) ts
(FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> fromInteger i
RP p _ -> getIndex p
TM -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -> idx rs i
RP _ r -> getField r i
TM -> TM
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
```
The result of linearization is usually a record, which is realized as
a string using the following algorithm.
```
realize :: Term -> String
realize trm = case trm of
R (t:_) -> realize t
S ss -> unwords $ map realize ss
K s -> s
W s t -> s ++ realize t
FV (t:_) -> realize t -- TODO: all variants
TM -> "?"
```
Notice that realization always picks the first field of a record.
If a linearization type has more than one field, the first field
does not necessarily contain the desired string.
Also notice that the order of record fields in GFCC is not necessarily
the same as in GF source.

View File

@@ -231,10 +231,10 @@ lookupAbsDef gr m c = errIn ("looking up absdef of" +++ prt c) $ do
_ -> Bad $ prt m +++ "is not an abstract module"
linTypeInt :: Type
linTypeInt =
let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in
RecType [
(LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)]
linTypeInt = defLinType
--- let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in
--- RecType [
--- (LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)]
lookupLincat :: SourceGrammar -> Ident -> Ident -> Err Type
lookupLincat gr m c | elem c [zIdent "Int"] = return linTypeInt