took back smart type of Int ; Digits type in resource and some adjustments of Det syntax (not yet for romance and russian)

2007-12-17 18:12:46 +00:00
parent 7551c70db6
commit 27602f4f82
6 changed files with 199 additions and 13 deletions
@@ -307,8 +307,8 @@ type ParamEnv =
 paramValues :: SourceGrammar -> ParamEnv
 paramValues cgr = (labels,untyps,typs) where
  partyps = nub $ 
-            [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt 
-              ++ [ty | 
+            --- [App (Q (IC "Predef") (IC "Ints")) (EInt i) | i <- [1,9]] ---linTypeInt 
+            [ty | 
              (_,(_,CncCat (Yes (RecType ls)) _ _)) <- jments,
              ty0 <- [ty | (_, ty) <- unlockTyp ls],
              ty  <- typsFrom ty0
@@ -31,8 +31,9 @@ linExp mcfg lang tree@(DTr _ at trees) =  ---- bindings TODO
  case at of
    AC fun -> comp (lmap lin trees) $ look fun
    AS s   -> R [kks (show s)] -- quoted
-    AI i   -> R [C lst, kks (show i), C size] where 
-                lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1
+    AI i   -> R [kks (show i)] 
+                --- [C lst, kks (show i), C size] where 
+                --- lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1
    AF d   -> R [kks (show d)]
    AM _   -> TM
 where
@@ -73,7 +73,7 @@ toExp e = case e of
  App fun [App (CId "B") xs, App (CId "X") exps] ->
    DTr [x | AId x <- xs] (AC fun) (lmap toExp exps)
  App (CId "Eq") eqs -> 
-    EEq [Equ (lmap toExp ps) (toExp v) | App (CId "Case") (v:ps) <- eqs]
+    EEq [Equ (lmap toExp ps) (toExp v) | App (CId "E") (v:ps) <- eqs]
  AMet -> DTr [] (AM 0) []
  AInt i -> DTr [] (AI i) []
  AFlt i -> DTr [] (AF i) []
@@ -147,7 +147,7 @@ fromExp e = case e of
  DTr [] (AI i) [] -> AInt (toInteger i)
  DTr [] (AM _) [] -> AMet ----
  EEq eqs -> 
-    App (CId "Eq") [App (CId "Case") (lmap fromExp (v:ps)) | Equ ps v <- eqs]
+    App (CId "Eq") [App (CId "E") (lmap fromExp (v:ps)) | Equ ps v <- eqs]
  _ -> error $ "exp " ++ show e

 fromTerm :: Term -> RExp
@@ -1,6 +1,6 @@
 The GFCC Grammar Format
 Aarne Ranta
-October 5, 2007
+December 14, 2007

 Author's address:
 [``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
@@ -8,6 +8,7 @@ Author's address:
 % to compile: txt2tags -thtml --toc gfcc.txt

 History:
+- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC
 - 5 Oct 2007: new, better structured GFCC with full expressive power
 - 19 Oct: translation of lincats, new figures on C++
 - 3 Oct 2006: first version
@@ -53,7 +54,8 @@ will be used instead. GFC provides only marginal advantages as a target format
 compared with GF, and it is therefore just extra weight to carry around this
 format.

-The main differences of GFCC compared with GFC (and GF) can be summarized as follows:
+The main differences of GFCC compared with GFC (and GF) can be 
+summarized as follows:
 - there are no modules, and therefore no qualified names
 - a GFCC grammar is multilingual, and consists of a common abstract syntax 
  together with one concrete syntax per language
@@ -66,7 +68,8 @@ The main differences of GFCC compared with GFC (and GF) can be summarized as fol


 Here is an example of a GF grammar, consisting of three modules, 
-as translated to GFCC. The representations are aligned; thus they do not completely
+as translated to GFCC. The representations are aligned; 
+thus they do not completely
 reflect the order of judgements in GFCC files, which have different orders of
 blocks of judgements, and alphabetical sorting.
 ```  
@@ -0,0 +1,182 @@
+GFCC Syntax 
+
+
+==Syntax of GFCC files==
+
+The parser syntax is very simple, as defined in BNF:
+```
+  Grm. Grammar ::= [RExp] ;
+
+  App.  RExp ::= "(" CId [RExp] ")" ;
+  AId.  RExp ::= CId ;
+  AInt. RExp ::= Integer ;
+  AStr. RExp ::= String ;
+  AFlt. RExp ::= Double ;
+  AMet. RExp ::= "?" ;
+
+  terminator RExp "" ;
+
+  token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
+```
+While a parser and a printer can be generated for many languages
+from this grammar by using the BNF Converter, a parser is also
+easy to write by hand using recursive descent.
+
+
+==Syntax of well-formed GFCC code==
+
+Here is a summary of well-formed syntax, 
+with a comment on the semantics of each construction.
+```
+  Grammar ::= 
+    CId                          -- abstract syntax names
+    "(" "concrete" CId* ")"      -- concrete syntax names
+    "(" "flags"    Flag* ")"     -- global flags
+    "(" "abstract" Abstract ")"  -- abstract syntax
+    "(" "concrete" Concrete* ")" -- concrete syntaxes
+
+  Abstract ::= 
+    "(" "flags" Flag*   ")"      -- abstract flags
+    "(" "fun"   FunDef* ")"      -- function definitions
+    "(" "cat"   CatDef* ")"      -- category definitions
+
+  Concrete ::= 
+    "(" CId                      -- language name
+      "flags"     Flag*          -- concrete flags
+      "lin"       LinDef*        -- linearization rules
+      "oper"      LinDef*        -- operations (macros)
+      "lincat"    LinDef*        -- linearization type definitions
+      "lindef"    LinDef*        -- linearization default definitions
+      "printname" LinDef*        -- printname definitions
+      "param"     LinDef*        -- lincats with labels and parameter value names
+    ")" 
+
+  Flag   ::= "(" CId String ")"   -- flag and value
+  FunDef ::= "(" CId Type Exp ")" -- function, type, and definition
+  CatDef ::= "(" CId Hypo* ")"    -- category and context
+  LinDef ::= "(" CId Term ")"     -- function and definition
+
+  Type ::= 
+    "(" CId                 -- value category
+      "(" "H" Hypo* ")"     --   argument context
+      "(" "X" Exp* ")" ")"  --   arguments (of dependent value type)
+
+  Exp ::=
+     "(" CId                -- function
+       "(" "B" CId* ")"     --   bindings
+       "(" "X" Exp* ")" ")" --   arguments
+   | CId                    -- variable
+   | "?"                    -- metavariable
+   | "(" "Eq" Equation* ")" -- group of pattern equations
+   | Integer                -- integer literal (non-negative)
+   | Float                  -- floating-point literal (non-negative)
+   | String                 -- string literal (in double quotes)
+
+  Hypo ::= "(" CId Type ")" -- variable and type
+
+  Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list
+
+  Term ::= 
+     "(" "R"  Term* ")"       -- array (record or table)
+   | "(" "S"  Term* ")"       -- concatenated sequence
+   | "(" "FV" Term* ")"       -- free variant list
+   | "(" "P"  Term Term ")"   -- access to index (projection or selection)
+   | "(" "W"  String Term ")" -- token prefix with suffix list
+   | "(" "A"  Integer ")"     -- pointer to subtree
+   | String                   -- token (in double quotes)
+   | Integer                  -- index in array
+   | CId                      -- macro constant
+   | "?"                      -- metavariable
+```
+
+
+==GFCC interpreter==
+
+The first phase in interpreting GFCC is to parse a GFCC file and
+build an internal abstract syntax representation, as specified
+in the previous section.
+
+With this representation, linearization can be performed by
+a straightforward function from expressions (``Exp``) to terms
+(``Term``). All expressions except groups of pattern equations
+can be linearized.
+
+Here is a reference Haskell implementation of linearization:
+```
+  linExp :: GFCC -> CId -> Exp -> Term
+  linExp gfcc lang tree@(DTr _ at trees) = case at of
+    AC fun -> comp (map lin trees) $ look fun
+    AS s   -> R [K (show s)] -- quoted
+    AI i   -> R [K (show i)]
+    AF d   -> R [K (show d)]
+    AM     -> TM
+   where
+     lin  = linExp gfcc lang
+     comp = compute gfcc lang
+     look = lookLin gfcc lang
+```
+TODO: bindings must be supported.
+
+Terms resulting from linearization are evaluated in
+call-by-value order, with two environments needed:
+- the grammar (a concrete syntax) to give the global constants
+- an array of terms to give the subtree linearizations
+
+
+The Haskell implementation works as follows:
+```
+compute :: GFCC -> CId -> [Term] -> Term -> Term
+compute gfcc lang args = comp where
+  comp trm = case trm of
+    P r p  -> proj (comp r) (comp p)
+    W s t  -> W s (comp t)
+    R ts   -> R $ map comp ts
+    V i    -> idx args (fromInteger i)  -- already computed
+    F c    -> comp $ look c             -- not computed (if contains V)
+    FV ts  -> FV $ Prelude.map comp ts
+    S ts   -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
+    _ -> trm
+
+  look = lookOper gfcc lang
+
+  idx xs i = xs !! i
+
+  proj r p = case (r,p) of
+    (_,     FV ts) -> FV $ Prelude.map (proj r) ts
+    (FV ts, _    ) -> FV $ Prelude.map (\t -> proj t p) ts
+    (W s t, _)     -> kks (s ++ getString (proj t p))
+    _              -> comp $ getField r (getIndex p)
+
+  getString t = case t of
+    K (KS s) -> s
+    _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
+
+  getIndex t =  case t of
+    C i    -> fromInteger i
+    RP p _ -> getIndex p
+    TM     -> 0  -- default value for parameter
+    _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
+
+  getField t i = case t of
+    R rs   -> idx rs i
+    RP _ r -> getField r i
+    TM     -> TM
+    _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
+```
+The result of linearization is usually a record, which is realized as
+a string using the following algorithm.
+```
+  realize :: Term -> String
+  realize trm = case trm of
+    R (t:_)  -> realize t
+    S ss     -> unwords $ map realize ss
+    K s      -> s
+    W s t    -> s ++ realize t
+    FV (t:_) -> realize t  -- TODO: all variants
+    TM       -> "?"
+```
+Notice that realization always picks the first field of a record.
+If a linearization type has more than one field, the first field
+does not necessarily contain the desired string.
+Also notice that the order of record fields in GFCC is not necessarily
+the same as in GF source.
@@ -231,10 +231,10 @@ lookupAbsDef gr m c = errIn ("looking up absdef of" +++ prt c) $ do
    _ -> Bad $ prt m +++ "is not an abstract module"

 linTypeInt :: Type
-linTypeInt = 
-      let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in
-      RecType [
-        (LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)]
+linTypeInt = defLinType
+---      let ints k = App (Q (IC "Predef") (IC "Ints")) (EInt k) in
+---      RecType [
+---        (LIdent "last",ints 9),(LIdent "s", typeStr), (LIdent "size",ints 1)]

 lookupLincat :: SourceGrammar -> Ident -> Ident -> Err Type
 lookupLincat gr m c | elem c [zIdent "Int"] = return linTypeInt