mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-20 00:22:51 -06:00
gfcc doc
This commit is contained in:
@@ -34,7 +34,8 @@ October 3, 2006
|
|||||||
<LI><A HREF="#toc13">Running the compiler and the GFCC interpreter</A>
|
<LI><A HREF="#toc13">Running the compiler and the GFCC interpreter</A>
|
||||||
</UL>
|
</UL>
|
||||||
<LI><A HREF="#toc14">The reference interpreter</A>
|
<LI><A HREF="#toc14">The reference interpreter</A>
|
||||||
<LI><A HREF="#toc15">Some things to do</A>
|
<LI><A HREF="#toc15">Interpreter in C++</A>
|
||||||
|
<LI><A HREF="#toc16">Some things to do</A>
|
||||||
</UL>
|
</UL>
|
||||||
|
|
||||||
<P></P>
|
<P></P>
|
||||||
@@ -102,18 +103,18 @@ as translated to GFCC. The representations are aligned, with the exceptions
|
|||||||
due to the alphabetical sorting of GFCC grammars.
|
due to the alphabetical sorting of GFCC grammars.
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
grammar Ex (Eng Swe);
|
grammar Ex(Eng,Swe);
|
||||||
|
|
||||||
abstract Ex = { abstract {
|
abstract Ex = { abstract {
|
||||||
cat
|
cat
|
||||||
S ; NP ; VP ;
|
S ; NP ; VP ;
|
||||||
fun
|
fun
|
||||||
Pred : NP -> VP -> S ; Pred : NP VP -> S = (Pred);
|
Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred);
|
||||||
She, They : NP ; She : -> NP = (She);
|
She, They : NP ; She : -> NP = (She);
|
||||||
Sleep : VP ; Sleep : -> VP = (Sleep);
|
Sleep : VP ; Sleep : -> VP = (Sleep);
|
||||||
They : -> NP = (They);
|
They : -> NP = (They);
|
||||||
} } ;
|
} } ;
|
||||||
;
|
|
||||||
concrete Eng of Ex = { concrete Eng {
|
concrete Eng of Ex = { concrete Eng {
|
||||||
lincat
|
lincat
|
||||||
S = {s : Str} ;
|
S = {s : Str} ;
|
||||||
@@ -122,7 +123,7 @@ due to the alphabetical sorting of GFCC grammars.
|
|||||||
param
|
param
|
||||||
Num = Sg | Pl ;
|
Num = Sg | Pl ;
|
||||||
lin
|
lin
|
||||||
Pred np vp = { Pred = [($0[1], $1[0][$0[0]])] ;
|
Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))];
|
||||||
s = np.s ++ vp.s ! np.n} ;
|
s = np.s ++ vp.s ! np.n} ;
|
||||||
She = {s = "she" ; n = Sg} ; She = [0, "she"];
|
She = {s = "she" ; n = Sg} ; She = [0, "she"];
|
||||||
They = {s = "they" ; n = Pl} ;
|
They = {s = "they" ; n = Pl} ;
|
||||||
@@ -141,13 +142,12 @@ due to the alphabetical sorting of GFCC grammars.
|
|||||||
param
|
param
|
||||||
Num = Sg | Pl ;
|
Num = Sg | Pl ;
|
||||||
lin
|
lin
|
||||||
Pred np vp = { Pred = [($0[1], $1[0])];
|
Pred np vp = { Pred = [(($0!0),($1!0))];
|
||||||
s = np.s ++ vp.s} ;
|
s = np.s ++ vp.s} ;
|
||||||
She = {s = "hon"} ; She = ["hon"];
|
She = {s = "hon"} ; She = ["hon"];
|
||||||
They = {s = "de"} ; They = ["de"];
|
They = {s = "de"} ; They = ["de"];
|
||||||
Sleep = {s = "sover"} ; Sleep = ["sover"];
|
Sleep = {s = "sover"} ; Sleep = ["sover"];
|
||||||
} ;
|
} } ;
|
||||||
} ;
|
|
||||||
</PRE>
|
</PRE>
|
||||||
<P></P>
|
<P></P>
|
||||||
<A NAME="toc3"></A>
|
<A NAME="toc3"></A>
|
||||||
@@ -161,9 +161,9 @@ the concrete languages. The abstract syntax and the concrete
|
|||||||
syntaxes themselves follow.
|
syntaxes themselves follow.
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
Grammar ::= Header ";" Abstract ";" [Concrete] ";" ;
|
Grammar ::= Header ";" Abstract ";" [Concrete] ;
|
||||||
Header ::= "grammar" CId "(" [CId] ")" ;
|
Header ::= "grammar" CId "(" [CId] ")" ;
|
||||||
Abstract ::= "abstract" "{" [AbsDef] "}" ";" ;
|
Abstract ::= "abstract" "{" [AbsDef] "}" ;
|
||||||
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
|
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P>
|
||||||
@@ -224,24 +224,27 @@ literal.
|
|||||||
<H3>Concrete syntax</H3>
|
<H3>Concrete syntax</H3>
|
||||||
<P>
|
<P>
|
||||||
Linearization terms (<CODE>Term</CODE>) are built as follows.
|
Linearization terms (<CODE>Term</CODE>) are built as follows.
|
||||||
|
Constructor names are shown to make the later code
|
||||||
|
examples readable.
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
Term ::= "[" [Term] "]" ; -- array
|
R. Term ::= "[" [Term] "]" ; -- array
|
||||||
Term ::= Term "[" Term "]" ; -- access to indexed field
|
P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field
|
||||||
Term ::= "(" [Term] ")" ; -- sequence with ++
|
S. Term ::= "(" [Term] ")" ; -- sequence with ++
|
||||||
Term ::= Tokn ; -- token
|
K. Term ::= Tokn ; -- token
|
||||||
Term ::= "$" Integer ; -- argument subtree
|
V. Term ::= "$" Integer ; -- argument
|
||||||
Term ::= Integer ; -- array index
|
C. Term ::= Integer ; -- array index
|
||||||
Term ::= "[|" [Term] "|]" ; -- free variation
|
FV. Term ::= "[|" [Term] "|]" ; -- free variation
|
||||||
|
TM. Term ::= "?" ; -- linearization of metavariable
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P>
|
||||||
Tokens are strings or (maybe obsolescent) prefix-dependent
|
Tokens are strings or (maybe obsolescent) prefix-dependent
|
||||||
variant lists.
|
variant lists.
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
Tokn ::= String ;
|
KS. Tokn ::= String ;
|
||||||
Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
|
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
|
||||||
Variant ::= [String] "/" [String] ;
|
Var. Variant ::= [String] "/" [String] ;
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P>
|
||||||
Three special forms of terms are introduced by the compiler
|
Three special forms of terms are introduced by the compiler
|
||||||
@@ -250,9 +253,9 @@ their presence makes grammars much more compact. Their semantics
|
|||||||
will be explained in a later section.
|
will be explained in a later section.
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
Term ::= CId ; -- global constant
|
F. Term ::= CId ; -- global constant
|
||||||
Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
|
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
|
||||||
Term ::= "(" Term "@" Term ")"; -- record parameter alias
|
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P>
|
||||||
Identifiers are like <CODE>Ident</CODE> in GF and GFC, except that
|
Identifiers are like <CODE>Ident</CODE> in GF and GFC, except that
|
||||||
@@ -282,7 +285,7 @@ in which linearization is performed.
|
|||||||
AS s -> R [kks (show s)] -- quoted
|
AS s -> R [kks (show s)] -- quoted
|
||||||
AI i -> R [kks (show i)]
|
AI i -> R [kks (show i)]
|
||||||
AF d -> R [kks (show d)]
|
AF d -> R [kks (show d)]
|
||||||
AM -> R [kks "?"] ---- TODO: proper lincat
|
AM -> TM
|
||||||
where
|
where
|
||||||
lin = linExp mcfg lang
|
lin = linExp mcfg lang
|
||||||
comp = compute mcfg lang
|
comp = compute mcfg lang
|
||||||
@@ -301,6 +304,7 @@ a string using the following algorithm.
|
|||||||
K (KP s _) -> unwords s ---- prefix choice TODO
|
K (KP s _) -> unwords s ---- prefix choice TODO
|
||||||
W s t -> s ++ realize t
|
W s t -> s ++ realize t
|
||||||
FV (t:_) -> realize t
|
FV (t:_) -> realize t
|
||||||
|
TM -> "?"
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P>
|
||||||
Since the order of record fields is not necessarily
|
Since the order of record fields is not necessarily
|
||||||
@@ -320,39 +324,48 @@ needed:
|
|||||||
</UL>
|
</UL>
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
The code is cleaned from debugging information present in the working
|
The code is presented in one-level pattern matching, to
|
||||||
version.
|
enable reimplementations in languages that do not permit
|
||||||
|
deep patterns (such as Java and C++).
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
||||||
compute mcfg lang args = comp where
|
compute mcfg lang args = comp where
|
||||||
comp trm = case trm of
|
comp trm = case trm of
|
||||||
P r (FV ts) -> FV $ Prelude.map (comp . P r) ts
|
P r p -> proj (comp r) (comp p)
|
||||||
|
|
||||||
P r p -> case (comp r, comp p) of
|
|
||||||
|
|
||||||
-- for the suffix optimization
|
|
||||||
(W s (R ss), p') -> case comp $ idx ss (getIndex p') of
|
|
||||||
K (KS u) -> kks (s ++ u)
|
|
||||||
|
|
||||||
(r', p') -> comp $ (getFields r') !! (getIndex p')
|
|
||||||
|
|
||||||
RP i t -> RP (comp i) (comp t)
|
RP i t -> RP (comp i) (comp t)
|
||||||
W s t -> W s (comp t)
|
W s t -> W s (comp t)
|
||||||
R ts -> R $ Prelude.map comp ts
|
R ts -> R $ Prelude.map comp ts
|
||||||
V i -> args !! (fromInteger i) -- already computed
|
V i -> idx args (fromInteger i) -- already computed
|
||||||
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
F c -> comp $ look c -- not computed (if contains V)
|
||||||
F c -> comp $ lookLin mcfg lang -- not yet computed
|
|
||||||
FV ts -> FV $ Prelude.map comp ts
|
FV ts -> FV $ Prelude.map comp ts
|
||||||
|
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
||||||
_ -> trm
|
_ -> trm
|
||||||
|
|
||||||
|
look = lookLin mcfg lang
|
||||||
|
|
||||||
|
idx xs i = xs !! i
|
||||||
|
|
||||||
|
proj r p = case (r,p) of
|
||||||
|
(_, FV ts) -> FV $ Prelude.map (proj r) ts
|
||||||
|
(W s t, _) -> kks (s ++ getString (proj t p))
|
||||||
|
_ -> comp $ getField r (getIndex p)
|
||||||
|
|
||||||
|
getString t = case t of
|
||||||
|
K (KS s) -> s
|
||||||
|
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
|
||||||
|
|
||||||
getIndex t = case t of
|
getIndex t = case t of
|
||||||
C i -> fromInteger i
|
C i -> fromInteger i
|
||||||
RP p _ -> getIndex p
|
RP p _ -> getIndex p
|
||||||
|
TM -> 0 -- default value for parameter
|
||||||
|
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
|
||||||
|
|
||||||
getFields t = case t of
|
getField t i = case t of
|
||||||
R rs -> rs
|
R rs -> idx rs i
|
||||||
RP _ r -> getFields r
|
RP _ r -> getField r i
|
||||||
|
TM -> TM
|
||||||
|
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
|
||||||
</PRE>
|
</PRE>
|
||||||
<P></P>
|
<P></P>
|
||||||
<A NAME="toc10"></A>
|
<A NAME="toc10"></A>
|
||||||
@@ -451,8 +464,8 @@ we get the encoding
|
|||||||
The GFCC computation rules are essentially
|
The GFCC computation rules are essentially
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
t [(i @ r)] = t[i]
|
(t ! (i @ _)) = (t ! i)
|
||||||
(i @ r) [j] = r[j]
|
((_ @ r) ! j) =(r ! j)
|
||||||
</PRE>
|
</PRE>
|
||||||
<P></P>
|
<P></P>
|
||||||
<A NAME="toc11"></A>
|
<A NAME="toc11"></A>
|
||||||
@@ -574,11 +587,11 @@ This expression must first be translated to a case expression,
|
|||||||
which can then be translated to the GFCC term
|
which can then be translated to the GFCC term
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
[2,5][$0[$1]]
|
([2,5] ! ($0 ! $1))
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P>
|
||||||
assuming that the variable $np$ is the first argument and that its
|
assuming that the variable <CODE>np</CODE> is the first argument and that its
|
||||||
$Number$ field is the second in the record.
|
<CODE>Number</CODE> field is the second in the record.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This transformation of course has to be performed recursively, since
|
This transformation of course has to be performed recursively, since
|
||||||
@@ -693,9 +706,71 @@ The available commands are
|
|||||||
</UL>
|
</UL>
|
||||||
|
|
||||||
<A NAME="toc15"></A>
|
<A NAME="toc15"></A>
|
||||||
|
<H2>Interpreter in C++</H2>
|
||||||
|
<P>
|
||||||
|
A base-line interpreter in C++ has been started.
|
||||||
|
Its main functionality is random generation of trees and linearization of them.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Here are some results from running the different interpreters, compared
|
||||||
|
to running the same grammar in GF, saved in <CODE>.gfcm</CODE> format.
|
||||||
|
The grammar contains the English, German, and Norwegian
|
||||||
|
versions of Bronzeage. The experiment was carried out on
|
||||||
|
Ubuntu Linux laptop with 1.5 GHz Intel centrino processor.
|
||||||
|
</P>
|
||||||
|
<TABLE CELLPADDING="4" BORDER="1">
|
||||||
|
<TR>
|
||||||
|
<TH></TH>
|
||||||
|
<TH>GF</TH>
|
||||||
|
<TH>gfcc(hs)</TH>
|
||||||
|
<TH>gfcc++</TH>
|
||||||
|
</TR>
|
||||||
|
<TR>
|
||||||
|
<TD>program size</TD>
|
||||||
|
<TD ALIGN="center">7249k</TD>
|
||||||
|
<TD ALIGN="center">803k</TD>
|
||||||
|
<TD ALIGN="right">113k</TD>
|
||||||
|
</TR>
|
||||||
|
<TR>
|
||||||
|
<TD>grammar size</TD>
|
||||||
|
<TD ALIGN="center">336k</TD>
|
||||||
|
<TD ALIGN="center">119k</TD>
|
||||||
|
<TD ALIGN="right">119k</TD>
|
||||||
|
</TR>
|
||||||
|
<TR>
|
||||||
|
<TD>read grammar</TD>
|
||||||
|
<TD ALIGN="center">1150ms</TD>
|
||||||
|
<TD ALIGN="center">510ms</TD>
|
||||||
|
<TD ALIGN="right">150ms</TD>
|
||||||
|
</TR>
|
||||||
|
<TR>
|
||||||
|
<TD>generate 222</TD>
|
||||||
|
<TD ALIGN="center">9500ms</TD>
|
||||||
|
<TD ALIGN="center">450ms</TD>
|
||||||
|
<TD ALIGN="right">800ms</TD>
|
||||||
|
</TR>
|
||||||
|
<TR>
|
||||||
|
<TD>memory</TD>
|
||||||
|
<TD ALIGN="center">21M</TD>
|
||||||
|
<TD ALIGN="center">10M</TD>
|
||||||
|
<TD ALIGN="right">2M</TD>
|
||||||
|
</TR>
|
||||||
|
</TABLE>
|
||||||
|
|
||||||
|
<P></P>
|
||||||
|
<P>
|
||||||
|
To summarize:
|
||||||
|
</P>
|
||||||
|
<UL>
|
||||||
|
<LI>going from GF to gfcc is a major win in both code size and efficiency
|
||||||
|
<LI>going from Haskell to C++ interpreter is a win in code size and memory,
|
||||||
|
but not so much in speed
|
||||||
|
</UL>
|
||||||
|
|
||||||
|
<A NAME="toc16"></A>
|
||||||
<H2>Some things to do</H2>
|
<H2>Some things to do</H2>
|
||||||
<P>
|
<P>
|
||||||
Interpreters in Java and C++.
|
Interpreter in Java.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Parsing via MCFG
|
Parsing via MCFG
|
||||||
@@ -706,7 +781,11 @@ Parsing via MCFG
|
|||||||
</UL>
|
</UL>
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
File compression of GFCC output.
|
Hand-written parsers for GFCC grammars to reduce code size
|
||||||
|
(and efficiency?) of interpreters.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Binary format and/or file compression of GFCC output.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Syntax editor based on GFCC.
|
Syntax editor based on GFCC.
|
||||||
|
|||||||
@@ -55,18 +55,18 @@ Here is an example of a GF grammar, consisting of three modules,
|
|||||||
as translated to GFCC. The representations are aligned, with the exceptions
|
as translated to GFCC. The representations are aligned, with the exceptions
|
||||||
due to the alphabetical sorting of GFCC grammars.
|
due to the alphabetical sorting of GFCC grammars.
|
||||||
```
|
```
|
||||||
grammar Ex (Eng Swe);
|
grammar Ex(Eng,Swe);
|
||||||
|
|
||||||
abstract Ex = { abstract {
|
abstract Ex = { abstract {
|
||||||
cat
|
cat
|
||||||
S ; NP ; VP ;
|
S ; NP ; VP ;
|
||||||
fun
|
fun
|
||||||
Pred : NP -> VP -> S ; Pred : NP VP -> S = (Pred);
|
Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred);
|
||||||
She, They : NP ; She : -> NP = (She);
|
She, They : NP ; She : -> NP = (She);
|
||||||
Sleep : VP ; Sleep : -> VP = (Sleep);
|
Sleep : VP ; Sleep : -> VP = (Sleep);
|
||||||
They : -> NP = (They);
|
They : -> NP = (They);
|
||||||
} } ;
|
} } ;
|
||||||
;
|
|
||||||
concrete Eng of Ex = { concrete Eng {
|
concrete Eng of Ex = { concrete Eng {
|
||||||
lincat
|
lincat
|
||||||
S = {s : Str} ;
|
S = {s : Str} ;
|
||||||
@@ -75,7 +75,7 @@ concrete Eng of Ex = { concrete Eng {
|
|||||||
param
|
param
|
||||||
Num = Sg | Pl ;
|
Num = Sg | Pl ;
|
||||||
lin
|
lin
|
||||||
Pred np vp = { Pred = [($0[1], $1[0][$0[0]])] ;
|
Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))];
|
||||||
s = np.s ++ vp.s ! np.n} ;
|
s = np.s ++ vp.s ! np.n} ;
|
||||||
She = {s = "she" ; n = Sg} ; She = [0, "she"];
|
She = {s = "she" ; n = Sg} ; She = [0, "she"];
|
||||||
They = {s = "they" ; n = Pl} ;
|
They = {s = "they" ; n = Pl} ;
|
||||||
@@ -94,13 +94,12 @@ concrete Swe of Ex = { concrete Swe {
|
|||||||
param
|
param
|
||||||
Num = Sg | Pl ;
|
Num = Sg | Pl ;
|
||||||
lin
|
lin
|
||||||
Pred np vp = { Pred = [($0[1], $1[0])];
|
Pred np vp = { Pred = [(($0!0),($1!0))];
|
||||||
s = np.s ++ vp.s} ;
|
s = np.s ++ vp.s} ;
|
||||||
She = {s = "hon"} ; She = ["hon"];
|
She = {s = "hon"} ; She = ["hon"];
|
||||||
They = {s = "de"} ; They = ["de"];
|
They = {s = "de"} ; They = ["de"];
|
||||||
Sleep = {s = "sover"} ; Sleep = ["sover"];
|
Sleep = {s = "sover"} ; Sleep = ["sover"];
|
||||||
} ;
|
} } ;
|
||||||
} ;
|
|
||||||
```
|
```
|
||||||
|
|
||||||
==The syntax of GFCC files==
|
==The syntax of GFCC files==
|
||||||
@@ -112,9 +111,9 @@ A grammar has a header telling the name of the abstract syntax
|
|||||||
the concrete languages. The abstract syntax and the concrete
|
the concrete languages. The abstract syntax and the concrete
|
||||||
syntaxes themselves follow.
|
syntaxes themselves follow.
|
||||||
```
|
```
|
||||||
Grammar ::= Header ";" Abstract ";" [Concrete] ";" ;
|
Grammar ::= Header ";" Abstract ";" [Concrete] ;
|
||||||
Header ::= "grammar" CId "(" [CId] ")" ;
|
Header ::= "grammar" CId "(" [CId] ")" ;
|
||||||
Abstract ::= "abstract" "{" [AbsDef] "}" ";" ;
|
Abstract ::= "abstract" "{" [AbsDef] "}" ;
|
||||||
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
|
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
|
||||||
```
|
```
|
||||||
Abstract syntax judgements give typings and semantic definitions.
|
Abstract syntax judgements give typings and semantic definitions.
|
||||||
@@ -168,30 +167,33 @@ literal.
|
|||||||
===Concrete syntax===
|
===Concrete syntax===
|
||||||
|
|
||||||
Linearization terms (``Term``) are built as follows.
|
Linearization terms (``Term``) are built as follows.
|
||||||
|
Constructor names are shown to make the later code
|
||||||
|
examples readable.
|
||||||
```
|
```
|
||||||
Term ::= "[" [Term] "]" ; -- array
|
R. Term ::= "[" [Term] "]" ; -- array
|
||||||
Term ::= Term "[" Term "]" ; -- access to indexed field
|
P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field
|
||||||
Term ::= "(" [Term] ")" ; -- sequence with ++
|
S. Term ::= "(" [Term] ")" ; -- sequence with ++
|
||||||
Term ::= Tokn ; -- token
|
K. Term ::= Tokn ; -- token
|
||||||
Term ::= "$" Integer ; -- argument subtree
|
V. Term ::= "$" Integer ; -- argument
|
||||||
Term ::= Integer ; -- array index
|
C. Term ::= Integer ; -- array index
|
||||||
Term ::= "[|" [Term] "|]" ; -- free variation
|
FV. Term ::= "[|" [Term] "|]" ; -- free variation
|
||||||
|
TM. Term ::= "?" ; -- linearization of metavariable
|
||||||
```
|
```
|
||||||
Tokens are strings or (maybe obsolescent) prefix-dependent
|
Tokens are strings or (maybe obsolescent) prefix-dependent
|
||||||
variant lists.
|
variant lists.
|
||||||
```
|
```
|
||||||
Tokn ::= String ;
|
KS. Tokn ::= String ;
|
||||||
Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
|
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
|
||||||
Variant ::= [String] "/" [String] ;
|
Var. Variant ::= [String] "/" [String] ;
|
||||||
```
|
```
|
||||||
Three special forms of terms are introduced by the compiler
|
Three special forms of terms are introduced by the compiler
|
||||||
as optimizations. They can in principle be eliminated, but
|
as optimizations. They can in principle be eliminated, but
|
||||||
their presence makes grammars much more compact. Their semantics
|
their presence makes grammars much more compact. Their semantics
|
||||||
will be explained in a later section.
|
will be explained in a later section.
|
||||||
```
|
```
|
||||||
Term ::= CId ; -- global constant
|
F. Term ::= CId ; -- global constant
|
||||||
Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
|
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
|
||||||
Term ::= "(" Term "@" Term ")"; -- record parameter alias
|
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
|
||||||
```
|
```
|
||||||
Identifiers are like ``Ident`` in GF and GFC, except that
|
Identifiers are like ``Ident`` in GF and GFC, except that
|
||||||
the compiler produces constants prefixed with ``_`` in
|
the compiler produces constants prefixed with ``_`` in
|
||||||
@@ -218,7 +220,7 @@ in which linearization is performed.
|
|||||||
AS s -> R [kks (show s)] -- quoted
|
AS s -> R [kks (show s)] -- quoted
|
||||||
AI i -> R [kks (show i)]
|
AI i -> R [kks (show i)]
|
||||||
AF d -> R [kks (show d)]
|
AF d -> R [kks (show d)]
|
||||||
AM -> R [kks "?"] ---- TODO: proper lincat
|
AM -> TM
|
||||||
where
|
where
|
||||||
lin = linExp mcfg lang
|
lin = linExp mcfg lang
|
||||||
comp = compute mcfg lang
|
comp = compute mcfg lang
|
||||||
@@ -235,6 +237,7 @@ a string using the following algorithm.
|
|||||||
K (KP s _) -> unwords s ---- prefix choice TODO
|
K (KP s _) -> unwords s ---- prefix choice TODO
|
||||||
W s t -> s ++ realize t
|
W s t -> s ++ realize t
|
||||||
FV (t:_) -> realize t
|
FV (t:_) -> realize t
|
||||||
|
TM -> "?"
|
||||||
```
|
```
|
||||||
Since the order of record fields is not necessarily
|
Since the order of record fields is not necessarily
|
||||||
the same as in GF source,
|
the same as in GF source,
|
||||||
@@ -250,38 +253,47 @@ needed:
|
|||||||
- an array of terms to give the subtree linearizations
|
- an array of terms to give the subtree linearizations
|
||||||
|
|
||||||
|
|
||||||
The code is cleaned from debugging information present in the working
|
The code is presented in one-level pattern matching, to
|
||||||
version.
|
enable reimplementations in languages that do not permit
|
||||||
|
deep patterns (such as Java and C++).
|
||||||
```
|
```
|
||||||
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
||||||
compute mcfg lang args = comp where
|
compute mcfg lang args = comp where
|
||||||
comp trm = case trm of
|
comp trm = case trm of
|
||||||
P r (FV ts) -> FV $ Prelude.map (comp . P r) ts
|
P r p -> proj (comp r) (comp p)
|
||||||
|
|
||||||
P r p -> case (comp r, comp p) of
|
|
||||||
|
|
||||||
-- for the suffix optimization
|
|
||||||
(W s (R ss), p') -> case comp $ idx ss (getIndex p') of
|
|
||||||
K (KS u) -> kks (s ++ u)
|
|
||||||
|
|
||||||
(r', p') -> comp $ (getFields r') !! (getIndex p')
|
|
||||||
|
|
||||||
RP i t -> RP (comp i) (comp t)
|
RP i t -> RP (comp i) (comp t)
|
||||||
W s t -> W s (comp t)
|
W s t -> W s (comp t)
|
||||||
R ts -> R $ Prelude.map comp ts
|
R ts -> R $ Prelude.map comp ts
|
||||||
V i -> args !! (fromInteger i) -- already computed
|
V i -> idx args (fromInteger i) -- already computed
|
||||||
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
F c -> comp $ look c -- not computed (if contains V)
|
||||||
F c -> comp $ lookLin mcfg lang -- not yet computed
|
|
||||||
FV ts -> FV $ Prelude.map comp ts
|
FV ts -> FV $ Prelude.map comp ts
|
||||||
|
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
||||||
_ -> trm
|
_ -> trm
|
||||||
|
|
||||||
|
look = lookLin mcfg lang
|
||||||
|
|
||||||
|
idx xs i = xs !! i
|
||||||
|
|
||||||
|
proj r p = case (r,p) of
|
||||||
|
(_, FV ts) -> FV $ Prelude.map (proj r) ts
|
||||||
|
(W s t, _) -> kks (s ++ getString (proj t p))
|
||||||
|
_ -> comp $ getField r (getIndex p)
|
||||||
|
|
||||||
|
getString t = case t of
|
||||||
|
K (KS s) -> s
|
||||||
|
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
|
||||||
|
|
||||||
getIndex t = case t of
|
getIndex t = case t of
|
||||||
C i -> fromInteger i
|
C i -> fromInteger i
|
||||||
RP p _ -> getIndex p
|
RP p _ -> getIndex p
|
||||||
|
TM -> 0 -- default value for parameter
|
||||||
|
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
|
||||||
|
|
||||||
getFields t = case t of
|
getField t i = case t of
|
||||||
R rs -> rs
|
R rs -> idx rs i
|
||||||
RP _ r -> getFields r
|
RP _ r -> getField r i
|
||||||
|
TM -> TM
|
||||||
|
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
|
||||||
```
|
```
|
||||||
|
|
||||||
===The special term constructors===
|
===The special term constructors===
|
||||||
@@ -353,8 +365,8 @@ we get the encoding
|
|||||||
```
|
```
|
||||||
The GFCC computation rules are essentially
|
The GFCC computation rules are essentially
|
||||||
```
|
```
|
||||||
t [(i @ r)] = t[i]
|
(t ! (i @ _)) = (t ! i)
|
||||||
(i @ r) [j] = r[j]
|
((_ @ r) ! j) =(r ! j)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -456,10 +468,10 @@ This expression must first be translated to a case expression,
|
|||||||
```
|
```
|
||||||
which can then be translated to the GFCC term
|
which can then be translated to the GFCC term
|
||||||
```
|
```
|
||||||
[2,5][$0[$1]]
|
([2,5] ! ($0 ! $1))
|
||||||
```
|
```
|
||||||
assuming that the variable $np$ is the first argument and that its
|
assuming that the variable ``np`` is the first argument and that its
|
||||||
$Number$ field is the second in the record.
|
``Number`` field is the second in the record.
|
||||||
|
|
||||||
This transformation of course has to be performed recursively, since
|
This transformation of course has to be performed recursively, since
|
||||||
there can be several run-time variables in a parameter value:
|
there can be several run-time variables in a parameter value:
|
||||||
@@ -558,16 +570,46 @@ The available commands are
|
|||||||
- ``quit``: terminate the system cleanly
|
- ``quit``: terminate the system cleanly
|
||||||
|
|
||||||
|
|
||||||
|
==Interpreter in C++==
|
||||||
|
|
||||||
|
A base-line interpreter in C++ has been started.
|
||||||
|
Its main functionality is random generation of trees and linearization of them.
|
||||||
|
|
||||||
|
Here are some results from running the different interpreters, compared
|
||||||
|
to running the same grammar in GF, saved in ``.gfcm`` format.
|
||||||
|
The grammar contains the English, German, and Norwegian
|
||||||
|
versions of Bronzeage. The experiment was carried out on
|
||||||
|
Ubuntu Linux laptop with 1.5 GHz Intel centrino processor.
|
||||||
|
|
||||||
|
|| | GF | gfcc(hs) | gfcc++ |
|
||||||
|
| program size | 7249k | 803k | 113k
|
||||||
|
| grammar size | 336k | 119k | 119k
|
||||||
|
| read grammar | 1150ms | 510ms | 150ms
|
||||||
|
| generate 222 | 9500ms | 450ms | 800ms
|
||||||
|
| memory | 21M | 10M | 2M
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
To summarize:
|
||||||
|
- going from GF to gfcc is a major win in both code size and efficiency
|
||||||
|
- going from Haskell to C++ interpreter is a win in code size and memory,
|
||||||
|
but not so much in speed
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Some things to do==
|
==Some things to do==
|
||||||
|
|
||||||
Interpreters in Java and C++.
|
Interpreter in Java.
|
||||||
|
|
||||||
Parsing via MCFG
|
Parsing via MCFG
|
||||||
- the FCFG format can possibly be simplified
|
- the FCFG format can possibly be simplified
|
||||||
- parser grammars should be saved in files to make interpreters easier
|
- parser grammars should be saved in files to make interpreters easier
|
||||||
|
|
||||||
|
|
||||||
File compression of GFCC output.
|
Hand-written parsers for GFCC grammars to reduce code size
|
||||||
|
(and efficiency?) of interpreters.
|
||||||
|
|
||||||
|
Binary format and/or file compression of GFCC output.
|
||||||
|
|
||||||
Syntax editor based on GFCC.
|
Syntax editor based on GFCC.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user