moved parts of doc to deprecated/doc
259
doc/2341.html
@@ -1,259 +0,0 @@
|
||||
<html>
|
||||
<HEAD><META http-equiv=Content-Type content="text/html; charset=utf-8"></HEAD>
|
||||
<body>
|
||||
af_tunni : lámma kún síddi? boqól afartón i ków
|
||||
|
||||
<p>
|
||||
albanian : dy mijë tre qind e dyzet e një
|
||||
|
||||
<p>
|
||||
amharic : ሁለት ሺህ ሦስት መቶ ኣርባ ኣንድ
|
||||
|
||||
<p>
|
||||
arabic_classical : الفان و ثلاث مائة و واحد و أربعون
|
||||
|
||||
<p>
|
||||
arabic_modern : ﺍﻟﻔﻴﻦ ﻭ ﺛﻼﺛﻤﺎﺋﺔ ﻭ ﻭﺍﺣﺪ ﻭ ﺃﺭﺑﻌﻴﻦ
|
||||
|
||||
<p>
|
||||
basque : bi mila ta hirurehun berrogei ta bat
|
||||
|
||||
<p>
|
||||
bearlake_slave : nákee lamíl tai lak'o, óno, di,i, honéno, ?ó, l-ée
|
||||
|
||||
<p>
|
||||
bulgarian : две жиляди триста четирисет и едно
|
||||
|
||||
<p>
|
||||
catalan : dos mil tres-cents quaranta - u
|
||||
|
||||
<p>
|
||||
chinese : è´° ä» é¶ å ä½° è æ¾ 壹
|
||||
|
||||
<p>
|
||||
croatian : dva hiljade tri stotine četrdeset i jedan
|
||||
|
||||
<p>
|
||||
czech : dva tisíce tr^i sta čtyr^icet jeden
|
||||
|
||||
<p>
|
||||
dagur : hoire miange guarebe jau duci neke
|
||||
|
||||
<p>
|
||||
danish : to tusind og tre hundrede og en og fyrre
|
||||
|
||||
<p>
|
||||
decimal : 2341
|
||||
|
||||
<p>
|
||||
dutch : twee duizend drie honderd een en veertig
|
||||
|
||||
<p>
|
||||
english : two thousand three hundred and forty - one
|
||||
|
||||
<p>
|
||||
finnish : kaksi tuhatta kolme sataa neljä kymmentä yksi
|
||||
|
||||
<p>
|
||||
french : deux mille trois cent quarante et un
|
||||
|
||||
<p>
|
||||
french_swiss : deux mille trois cent quarante et un
|
||||
|
||||
<p>
|
||||
fulfulde : ujine d.id.i temed.d.e tati e chappand.e nai e go'o
|
||||
|
||||
<p>
|
||||
geez : ዕሽራ ወ ሠላስቱ ምእት አርብዓ ወ አሐዱ
|
||||
|
||||
<p>
|
||||
german : zwei tausend drei hundert ein und vierzig
|
||||
|
||||
<p>
|
||||
greek_classical : δισχίλιοι τριακόσιοι τετταράκοντα εἵς
|
||||
|
||||
<p>
|
||||
greek_modern : δύο χιλιάδες τριακόσια σαράντα ένα
|
||||
|
||||
<p>
|
||||
guahibo : aniha sunu akueya sia yana bae kae
|
||||
|
||||
<p>
|
||||
guarani : moko~i ma mpohapy sa~ irundy kua~ petei~
|
||||
|
||||
<p>
|
||||
hebrew_biblical : אלפים ו שלש מאות ו ארבעים ו אחד
|
||||
|
||||
<p>
|
||||
hindi : दो हज़ार तीन सौ एक्तालीस
|
||||
|
||||
<p>
|
||||
hungarian : két ezer három száz negyven egy
|
||||
|
||||
<p>
|
||||
icelandic : tvö Þúsund Þrjú hundrað fjörutíu og einn
|
||||
|
||||
<p>
|
||||
irish : dhá mhíle trí chead dhá fhichead a haon
|
||||
|
||||
<p>
|
||||
italian : due mila tre cento quaranta uno
|
||||
|
||||
<p>
|
||||
japanese : にせん さんびゃく よんぢゅう いち
|
||||
|
||||
<p>
|
||||
kabardian : m&yn&yt' s'a&ys' p'L-'&s'ra z&ra
|
||||
|
||||
<p>
|
||||
kambera : dua riu tailu ngahu patu kambulu hau
|
||||
|
||||
<p>
|
||||
kawaiisu : N
|
||||
<p>
|
||||
khmer : bīra bā'na pī raya sē sipa mwya
|
||||
|
||||
<p>
|
||||
khowar : joo hazâr troi shọr oché joo bîsher î
|
||||
|
||||
<p>
|
||||
kodagu : i:ra:yrat mu:nu:yt.a na:padï
|
||||
|
||||
<p>
|
||||
kolyma_yukaghir : N
|
||||
<p>
|
||||
kulung : ni habau su chhum lik i
|
||||
|
||||
<p>
|
||||
kwami : dùbúk póllów dálmágí kúnún kán kúu pòD^òw kán múndí
|
||||
|
||||
<p>
|
||||
kwaza : N
|
||||
<p>
|
||||
lalo : `n. t'w sa há i tjhí tjh`&
|
||||
|
||||
<p>
|
||||
lamani : di hajaar do se caaLise par ek
|
||||
|
||||
<p>
|
||||
latvian : divtu^kstoš trīssimt četrdesmit viens
|
||||
|
||||
<p>
|
||||
lithuanian : dù tú:kstanc^iu, try:s s^imtai~ ke:turiasdes^imt víenas
|
||||
|
||||
<p>
|
||||
lotuxo : tausand ârrexai ikO EssIxa xunixoi ikO atOmwana aNwan x' âbotye
|
||||
|
||||
<p>
|
||||
maale : lam?ó $íya haitsó s'ééta ?oydí-támmi pétte
|
||||
|
||||
<p>
|
||||
malay : dua ribu tiga ratus empat puluh satu
|
||||
|
||||
<p>
|
||||
maltese : elfejn tliet mija u wieh-ed u erbgh-in
|
||||
|
||||
<p>
|
||||
mapuche : epu warangka külá pataka meli mari kiñe
|
||||
|
||||
<p>
|
||||
margi : dúbú s`&d>àN ghàrú mák`&r agá fód>ú kùmì gà s'&r pátlú*
|
||||
|
||||
<p>
|
||||
maybrat : N
|
||||
<p>
|
||||
miya : d'&bu ts`&r '`&náa d>àriy kìdi '`&náa díb>i f`&d>& bèh&n wut'&
|
||||
|
||||
<p>
|
||||
mongolian : qoyar mingGan Gurban ĵa'un döčin nigän
|
||||
|
||||
<p>
|
||||
nenets : side juonar n-ahar jur t-êt ju' ~ob
|
||||
|
||||
<p>
|
||||
norwegian_book : to tusen og tre hundre og førti et
|
||||
|
||||
<p>
|
||||
old_church_slavonic : дъвѣ тысѭшти триѥ съта четыре десѧте и ѥдинъ
|
||||
|
||||
<p>
|
||||
oromo : kuma lama fi dhibba sadii fi afurtamii tokko
|
||||
|
||||
<p>
|
||||
pashto : دوه زره دري سوه او يو څلوۍښت
|
||||
|
||||
<p>
|
||||
polish : dwa tysiace trzysta czterdziesci jeden
|
||||
|
||||
<p>
|
||||
portuguese : dois mil trezentos quarenta e um
|
||||
|
||||
<p>
|
||||
quechua : iskay warank'a kinsa pachak tawa chunka jukniyuq
|
||||
|
||||
<p>
|
||||
romanian : două mii trei sute patruzeci şi unu
|
||||
|
||||
<p>
|
||||
russian : две тысячи триста сорок один
|
||||
|
||||
<p>
|
||||
sango : ngbangbu bale óse na ndó ní ngbangbu otá na ndó ní bale osió na ndó ní ÓkO
|
||||
|
||||
<p>
|
||||
sanskrit : त्रि शतान्य एकचत्वारिंशच च द्वे सहस्रे
|
||||
|
||||
<p>
|
||||
slovak : dva tisic tri sto styridsat jedna
|
||||
|
||||
<p>
|
||||
sorani : دۇ ههزار سىسهد ځل و يهك
|
||||
|
||||
<p>
|
||||
spanish : dos mil trescientos cuarenta y uno
|
||||
|
||||
<p>
|
||||
stieng : baar ban pê riêng puôn jo't muôi
|
||||
|
||||
<p>
|
||||
swahili : elfu mbili mia tatu arobaini na moja
|
||||
|
||||
<p>
|
||||
swedish : två tusen tre hundra fyrtio ett
|
||||
|
||||
<p>
|
||||
tamil : இரணௌடௌ ஆயாரதௌதீ மீனௌ நரீ நரௌ பதௌ ஓனௌரீ
|
||||
|
||||
<p>
|
||||
tampere : kaks tuhatta kolme sataa nel kyt yks
|
||||
|
||||
<p>
|
||||
tibetan : t̆ong ṭ'a' n̆yī d́ang sumğya d́ang z̆hyib chu źhye chi'
|
||||
|
||||
<p>
|
||||
totonac : maa t~u3 mil lii ~a tuhun pus^um tun
|
||||
|
||||
<p>
|
||||
tuda_daza : dubu cu sao kidra ago.zo. sao mOrta tozo sao tro
|
||||
|
||||
<p>
|
||||
tukang_besi : dua riwu tolu hatu hato hulu sa'asa
|
||||
|
||||
<p>
|
||||
turkish : iki bin üç yüz kırk bir
|
||||
|
||||
<p>
|
||||
votic : kahsi tuhatta keVmsata: nelläts^ümmet ühsi
|
||||
|
||||
<p>
|
||||
welsh : dau fil tri chan un a deugain
|
||||
|
||||
<p>
|
||||
yasin_burushaski : altó hazár iskí tha altó-áltar hek
|
||||
|
||||
<p>
|
||||
zaiwa : i55 hing55 sum11 syo31 mi11 cue31 ra11
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
BIN
doc/DocGF.pdf
569
doc/DocGF.tex
@@ -1,569 +0,0 @@
|
||||
\batchmode
|
||||
%This Latex file is machine-generated by the BNF-converter
|
||||
|
||||
\documentclass[a4paper,11pt]{article}
|
||||
\author{BNF-converter}
|
||||
\title{The Language GF}
|
||||
\setlength{\parindent}{0mm}
|
||||
\setlength{\parskip}{1mm}
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
|
||||
\newcommand{\emptyP}{\mbox{$\epsilon$}}
|
||||
\newcommand{\terminal}[1]{\mbox{{\texttt {#1}}}}
|
||||
\newcommand{\nonterminal}[1]{\mbox{$\langle \mbox{{\sl #1 }} \! \rangle$}}
|
||||
\newcommand{\arrow}{\mbox{::=}}
|
||||
\newcommand{\delimit}{\mbox{$|$}}
|
||||
\newcommand{\reserved}[1]{\mbox{{\texttt {#1}}}}
|
||||
\newcommand{\literal}[1]{\mbox{{\texttt {#1}}}}
|
||||
\newcommand{\symb}[1]{\mbox{{\texttt {#1}}}}
|
||||
|
||||
This document was automatically generated by the {\em BNF-Converter}. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place).
|
||||
|
||||
\section*{The lexical structure of GF}
|
||||
\subsection*{Identifiers}
|
||||
Identifiers \nonterminal{Ident} are unquoted strings beginning with a letter,
|
||||
followed by any combination of letters, digits, and the characters {\tt \_ '},
|
||||
reserved words excluded.
|
||||
|
||||
|
||||
\subsection*{Literals}
|
||||
Integer literals \nonterminal{Int}\ are nonempty sequences of digits.
|
||||
|
||||
|
||||
String literals \nonterminal{String}\ have the form
|
||||
\terminal{"}$x$\terminal{"}, where $x$ is any sequence of any characters
|
||||
except \terminal{"}\ unless preceded by \verb6\6.
|
||||
|
||||
|
||||
|
||||
|
||||
LString literals are recognized by the regular expression
|
||||
\(\mbox{`''} ({\nonterminal{anychar}} - \mbox{`''})* \mbox{`''}\)
|
||||
|
||||
|
||||
\subsection*{Reserved words and symbols}
|
||||
The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions.
|
||||
|
||||
The reserved words used in GF are the following: \\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\reserved{Lin}} &{\reserved{PType}} &{\reserved{Str}} \\
|
||||
{\reserved{Strs}} &{\reserved{Tok}} &{\reserved{Type}} \\
|
||||
{\reserved{abstract}} &{\reserved{case}} &{\reserved{cat}} \\
|
||||
{\reserved{concrete}} &{\reserved{data}} &{\reserved{def}} \\
|
||||
{\reserved{flags}} &{\reserved{fn}} &{\reserved{fun}} \\
|
||||
{\reserved{grammar}} &{\reserved{in}} &{\reserved{include}} \\
|
||||
{\reserved{incomplete}} &{\reserved{instance}} &{\reserved{interface}} \\
|
||||
{\reserved{let}} &{\reserved{lin}} &{\reserved{lincat}} \\
|
||||
{\reserved{lindef}} &{\reserved{lintype}} &{\reserved{of}} \\
|
||||
{\reserved{open}} &{\reserved{oper}} &{\reserved{out}} \\
|
||||
{\reserved{package}} &{\reserved{param}} &{\reserved{pattern}} \\
|
||||
{\reserved{pre}} &{\reserved{printname}} &{\reserved{resource}} \\
|
||||
{\reserved{reuse}} &{\reserved{strs}} &{\reserved{table}} \\
|
||||
{\reserved{tokenizer}} &{\reserved{transfer}} &{\reserved{union}} \\
|
||||
{\reserved{var}} &{\reserved{variants}} &{\reserved{where}} \\
|
||||
{\reserved{with}} & & \\
|
||||
\end{tabular}\\
|
||||
|
||||
The symbols used in GF are the following: \\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\symb{;}} &{\symb{{$=$}}} &{\symb{\{}} \\
|
||||
{\symb{\}}} &{\symb{(}} &{\symb{)}} \\
|
||||
{\symb{:}} &{\symb{{$-$}{$>$}}} &{\symb{**}} \\
|
||||
{\symb{,}} &{\symb{[}} &{\symb{]}} \\
|
||||
{\symb{.}} &{\symb{{$|$}}} &{\symb{\%}} \\
|
||||
{\symb{?}} &{\symb{{$<$}}} &{\symb{{$>$}}} \\
|
||||
{\symb{@}} &{\symb{!}} &{\symb{*}} \\
|
||||
{\symb{$\backslash$}} &{\symb{{$=$}{$>$}}} &{\symb{{$+$}{$+$}}} \\
|
||||
{\symb{{$+$}}} &{\symb{\_}} &{\symb{\$}} \\
|
||||
{\symb{/}} &{\symb{{$-$}}} & \\
|
||||
\end{tabular}\\
|
||||
|
||||
\subsection*{Comments}
|
||||
Single-line comments begin with {\symb{{$-$}{$-$}}}. \\Multiple-line comments are enclosed with {\symb{\{{$-$}}} and {\symb{{$-$}\}}}.
|
||||
|
||||
\section*{The syntactic structure of GF}
|
||||
Non-terminals are enclosed between $\langle$ and $\rangle$.
|
||||
The symbols {\arrow} (production), {\delimit} (union)
|
||||
and {\emptyP} (empty rule) belong to the BNF notation.
|
||||
All other symbols are terminals.\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Grammar}} & {\arrow} &{\nonterminal{ListModDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListModDef}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{ModDef}} {\nonterminal{ListModDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ModDef}} & {\arrow} &{\nonterminal{ModDef}} {\terminal{;}} \\
|
||||
& {\delimit} &{\terminal{grammar}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\terminal{abstract}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{;}} {\nonterminal{ListConcSpec}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\nonterminal{ComplMod}} {\nonterminal{ModType}} {\terminal{{$=$}}} {\nonterminal{ModBody}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ConcSpec}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ConcExp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListConcSpec}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{ConcSpec}} \\
|
||||
& {\delimit} &{\nonterminal{ConcSpec}} {\terminal{;}} {\nonterminal{ListConcSpec}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ConcExp}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListTransfer}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListTransfer}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{Transfer}} {\nonterminal{ListTransfer}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Transfer}} & {\arrow} &{\terminal{(}} {\terminal{transfer}} {\terminal{in}} {\nonterminal{Open}} {\terminal{)}} \\
|
||||
& {\delimit} &{\terminal{(}} {\terminal{transfer}} {\terminal{out}} {\nonterminal{Open}} {\terminal{)}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ModType}} & {\arrow} &{\terminal{abstract}} {\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{resource}} {\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{interface}} {\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{concrete}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{instance}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{transfer}} {\nonterminal{Ident}} {\terminal{:}} {\nonterminal{Open}} {\terminal{{$-$}{$>$}}} {\nonterminal{Open}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ModBody}} & {\arrow} &{\nonterminal{Extend}} {\nonterminal{Opens}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\
|
||||
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{**}} {\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\
|
||||
& {\delimit} &{\terminal{reuse}} {\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{union}} {\nonterminal{ListIncluded}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListTopDef}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{TopDef}} {\nonterminal{ListTopDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Extend}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{**}} \\
|
||||
& {\delimit} &{\emptyP} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListOpen}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{Open}} \\
|
||||
& {\delimit} &{\nonterminal{Open}} {\terminal{,}} {\nonterminal{ListOpen}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Opens}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\terminal{open}} {\nonterminal{ListOpen}} {\terminal{in}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Open}} & {\arrow} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{)}} \\
|
||||
& {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{)}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ComplMod}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\terminal{incomplete}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{QualOpen}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\terminal{incomplete}} \\
|
||||
& {\delimit} &{\terminal{interface}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListIncluded}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{Included}} \\
|
||||
& {\delimit} &{\nonterminal{Included}} {\terminal{,}} {\nonterminal{ListIncluded}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Included}} & {\arrow} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{[}} {\nonterminal{ListIdent}} {\terminal{]}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Def}} & {\arrow} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{Name}} {\nonterminal{ListPatt}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{TopDef}} & {\arrow} &{\terminal{cat}} {\nonterminal{ListCatDef}} \\
|
||||
& {\delimit} &{\terminal{fun}} {\nonterminal{ListFunDef}} \\
|
||||
& {\delimit} &{\terminal{data}} {\nonterminal{ListFunDef}} \\
|
||||
& {\delimit} &{\terminal{def}} {\nonterminal{ListDef}} \\
|
||||
& {\delimit} &{\terminal{data}} {\nonterminal{ListDataDef}} \\
|
||||
& {\delimit} &{\terminal{transfer}} {\nonterminal{ListDef}} \\
|
||||
& {\delimit} &{\terminal{param}} {\nonterminal{ListParDef}} \\
|
||||
& {\delimit} &{\terminal{oper}} {\nonterminal{ListDef}} \\
|
||||
& {\delimit} &{\terminal{lincat}} {\nonterminal{ListPrintDef}} \\
|
||||
& {\delimit} &{\terminal{lindef}} {\nonterminal{ListDef}} \\
|
||||
& {\delimit} &{\terminal{lin}} {\nonterminal{ListDef}} \\
|
||||
& {\delimit} &{\terminal{printname}} {\terminal{cat}} {\nonterminal{ListPrintDef}} \\
|
||||
& {\delimit} &{\terminal{printname}} {\terminal{fun}} {\nonterminal{ListPrintDef}} \\
|
||||
& {\delimit} &{\terminal{flags}} {\nonterminal{ListFlagDef}} \\
|
||||
& {\delimit} &{\terminal{printname}} {\nonterminal{ListPrintDef}} \\
|
||||
& {\delimit} &{\terminal{lintype}} {\nonterminal{ListDef}} \\
|
||||
& {\delimit} &{\terminal{pattern}} {\nonterminal{ListDef}} \\
|
||||
& {\delimit} &{\terminal{package}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} {\terminal{;}} \\
|
||||
& {\delimit} &{\terminal{var}} {\nonterminal{ListDef}} \\
|
||||
& {\delimit} &{\terminal{tokenizer}} {\nonterminal{Ident}} {\terminal{;}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{CatDef}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\
|
||||
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} \\
|
||||
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} {\terminal{\{}} {\nonterminal{Integer}} {\terminal{\}}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{FunDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{DataDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListDataConstr}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{DataConstr}} & {\arrow} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListDataConstr}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{DataConstr}} \\
|
||||
& {\delimit} &{\nonterminal{DataConstr}} {\terminal{{$|$}}} {\nonterminal{ListDataConstr}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ParDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListParConstr}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ParConstr}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{PrintDef}} & {\arrow} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{FlagDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListDef}} & {\arrow} &{\nonterminal{Def}} {\terminal{;}} \\
|
||||
& {\delimit} &{\nonterminal{Def}} {\terminal{;}} {\nonterminal{ListDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListCatDef}} & {\arrow} &{\nonterminal{CatDef}} {\terminal{;}} \\
|
||||
& {\delimit} &{\nonterminal{CatDef}} {\terminal{;}} {\nonterminal{ListCatDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListFunDef}} & {\arrow} &{\nonterminal{FunDef}} {\terminal{;}} \\
|
||||
& {\delimit} &{\nonterminal{FunDef}} {\terminal{;}} {\nonterminal{ListFunDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListDataDef}} & {\arrow} &{\nonterminal{DataDef}} {\terminal{;}} \\
|
||||
& {\delimit} &{\nonterminal{DataDef}} {\terminal{;}} {\nonterminal{ListDataDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListParDef}} & {\arrow} &{\nonterminal{ParDef}} {\terminal{;}} \\
|
||||
& {\delimit} &{\nonterminal{ParDef}} {\terminal{;}} {\nonterminal{ListParDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListPrintDef}} & {\arrow} &{\nonterminal{PrintDef}} {\terminal{;}} \\
|
||||
& {\delimit} &{\nonterminal{PrintDef}} {\terminal{;}} {\nonterminal{ListPrintDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListFlagDef}} & {\arrow} &{\nonterminal{FlagDef}} {\terminal{;}} \\
|
||||
& {\delimit} &{\nonterminal{FlagDef}} {\terminal{;}} {\nonterminal{ListFlagDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListParConstr}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{ParConstr}} \\
|
||||
& {\delimit} &{\nonterminal{ParConstr}} {\terminal{{$|$}}} {\nonterminal{ListParConstr}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListIdent}} & {\arrow} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{,}} {\nonterminal{ListIdent}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Name}} & {\arrow} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\terminal{]}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListName}} & {\arrow} &{\nonterminal{Name}} \\
|
||||
& {\delimit} &{\nonterminal{Name}} {\terminal{,}} {\nonterminal{ListName}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{LocDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListLocDef}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{LocDef}} \\
|
||||
& {\delimit} &{\nonterminal{LocDef}} {\terminal{;}} {\nonterminal{ListLocDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Exp4}} & {\arrow} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{\%}} \\
|
||||
& {\delimit} &{\nonterminal{Sort}} \\
|
||||
& {\delimit} &{\nonterminal{String}} \\
|
||||
& {\delimit} &{\nonterminal{Integer}} \\
|
||||
& {\delimit} &{\terminal{?}} \\
|
||||
& {\delimit} &{\terminal{[}} {\terminal{]}} \\
|
||||
& {\delimit} &{\terminal{data}} \\
|
||||
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{Exps}} {\terminal{]}} \\
|
||||
& {\delimit} &{\terminal{[}} {\nonterminal{String}} {\terminal{]}} \\
|
||||
& {\delimit} &{\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListTupleComp}} {\terminal{{$>$}}} \\
|
||||
& {\delimit} &{\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\
|
||||
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{Exp}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$>$}}} \\
|
||||
& {\delimit} &{\terminal{(}} {\nonterminal{Exp}} {\terminal{)}} \\
|
||||
& {\delimit} &{\nonterminal{LString}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Exp3}} & {\arrow} &{\nonterminal{Exp3}} {\terminal{.}} {\nonterminal{Label}} \\
|
||||
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\%}} \\
|
||||
& {\delimit} &{\nonterminal{Exp4}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Exp2}} & {\arrow} &{\nonterminal{Exp2}} {\nonterminal{Exp3}} \\
|
||||
& {\delimit} &{\terminal{table}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{[}} {\nonterminal{ListExp}} {\terminal{]}} \\
|
||||
& {\delimit} &{\terminal{case}} {\nonterminal{Exp}} {\terminal{of}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{variants}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{pre}} {\terminal{\{}} {\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListAltern}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{strs}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{@}} {\nonterminal{Exp4}} \\
|
||||
& {\delimit} &{\nonterminal{Exp3}} \\
|
||||
& {\delimit} &{\terminal{Lin}} {\nonterminal{Ident}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Exp1}} & {\arrow} &{\nonterminal{Exp1}} {\terminal{!}} {\nonterminal{Exp2}} \\
|
||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{*}} {\nonterminal{Exp2}} \\
|
||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{**}} {\nonterminal{Exp2}} \\
|
||||
& {\delimit} &{\nonterminal{Exp2}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Exp}} & {\arrow} &{\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\terminal{$\backslash$}} {\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{Decl}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}{$+$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\terminal{let}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} {\terminal{in}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\terminal{let}} {\nonterminal{ListLocDef}} {\terminal{in}} {\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{where}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{fn}} {\terminal{\{}} {\nonterminal{ListEquation}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\nonterminal{Exp1}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListExp}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{Exp}} \\
|
||||
& {\delimit} &{\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListExp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Exps}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{Exp4}} {\nonterminal{Exps}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Patt1}} & {\arrow} &{\terminal{\_}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\nonterminal{Integer}} \\
|
||||
& {\delimit} &{\nonterminal{String}} \\
|
||||
& {\delimit} &{\terminal{\{}} {\nonterminal{ListPattAss}} {\terminal{\}}} \\
|
||||
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListPattTupleComp}} {\terminal{{$>$}}} \\
|
||||
& {\delimit} &{\terminal{(}} {\nonterminal{Patt}} {\terminal{)}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Patt}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListPatt}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\nonterminal{ListPatt}} \\
|
||||
& {\delimit} &{\nonterminal{Patt1}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{PattAss}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Patt}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Label}} & {\arrow} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{\$}} {\nonterminal{Integer}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Sort}} & {\arrow} &{\terminal{Type}} \\
|
||||
& {\delimit} &{\terminal{PType}} \\
|
||||
& {\delimit} &{\terminal{Tok}} \\
|
||||
& {\delimit} &{\terminal{Str}} \\
|
||||
& {\delimit} &{\terminal{Strs}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListPattAss}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{PattAss}} \\
|
||||
& {\delimit} &{\nonterminal{PattAss}} {\terminal{;}} {\nonterminal{ListPattAss}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{PattAlt}} & {\arrow} &{\nonterminal{Patt}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListPatt}} & {\arrow} &{\nonterminal{Patt1}} \\
|
||||
& {\delimit} &{\nonterminal{Patt1}} {\nonterminal{ListPatt}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListPattAlt}} & {\arrow} &{\nonterminal{PattAlt}} \\
|
||||
& {\delimit} &{\nonterminal{PattAlt}} {\terminal{{$|$}}} {\nonterminal{ListPattAlt}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Bind}} & {\arrow} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{\_}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListBind}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{Bind}} \\
|
||||
& {\delimit} &{\nonterminal{Bind}} {\terminal{,}} {\nonterminal{ListBind}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Decl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\
|
||||
& {\delimit} &{\nonterminal{Exp2}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{TupleComp}} & {\arrow} &{\nonterminal{Exp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{PattTupleComp}} & {\arrow} &{\nonterminal{Patt}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListTupleComp}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{TupleComp}} \\
|
||||
& {\delimit} &{\nonterminal{TupleComp}} {\terminal{,}} {\nonterminal{ListTupleComp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListPattTupleComp}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{PattTupleComp}} \\
|
||||
& {\delimit} &{\nonterminal{PattTupleComp}} {\terminal{,}} {\nonterminal{ListPattTupleComp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Case}} & {\arrow} &{\nonterminal{ListPattAlt}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListCase}} & {\arrow} &{\nonterminal{Case}} \\
|
||||
& {\delimit} &{\nonterminal{Case}} {\terminal{;}} {\nonterminal{ListCase}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Equation}} & {\arrow} &{\nonterminal{ListPatt}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListEquation}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{Equation}} \\
|
||||
& {\delimit} &{\nonterminal{Equation}} {\terminal{;}} {\nonterminal{ListEquation}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Altern}} & {\arrow} &{\nonterminal{Exp}} {\terminal{/}} {\nonterminal{Exp}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListAltern}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{Altern}} \\
|
||||
& {\delimit} &{\nonterminal{Altern}} {\terminal{;}} {\nonterminal{ListAltern}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{DDecl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\
|
||||
& {\delimit} &{\nonterminal{Exp4}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListDDecl}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\nonterminal{DDecl}} {\nonterminal{ListDDecl}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{OldGrammar}} & {\arrow} &{\nonterminal{Include}} {\nonterminal{ListTopDef}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{Include}} & {\arrow} &{\emptyP} \\
|
||||
& {\delimit} &{\terminal{include}} {\nonterminal{ListFileName}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{FileName}} & {\arrow} &{\nonterminal{String}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} \\
|
||||
& {\delimit} &{\terminal{/}} {\nonterminal{FileName}} \\
|
||||
& {\delimit} &{\terminal{.}} {\nonterminal{FileName}} \\
|
||||
& {\delimit} &{\terminal{{$-$}}} {\nonterminal{FileName}} \\
|
||||
& {\delimit} &{\nonterminal{Ident}} {\nonterminal{FileName}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
\begin{tabular}{lll}
|
||||
{\nonterminal{ListFileName}} & {\arrow} &{\nonterminal{FileName}} {\terminal{;}} \\
|
||||
& {\delimit} &{\nonterminal{FileName}} {\terminal{;}} {\nonterminal{ListFileName}} \\
|
||||
\end{tabular}\\
|
||||
|
||||
|
||||
|
||||
\end{document}
|
||||
|
||||
BIN
doc/German.png
|
Before Width: | Height: | Size: 20 KiB |
@@ -1,75 +0,0 @@
|
||||
digraph {
|
||||
|
||||
size = "12,8" ;
|
||||
|
||||
Lang [style = "solid", shape = "ellipse", URL = "Lang.gf"];
|
||||
|
||||
Lang -> Grammar [style = "solid"];
|
||||
Lang -> Lexicon [style = "solid"];
|
||||
|
||||
Grammar [style = "solid", shape = "ellipse", URL = "Lang.gf"];
|
||||
|
||||
|
||||
Grammar -> Noun [style = "solid"];
|
||||
Grammar -> Verb [style = "solid"];
|
||||
Grammar -> Adjective [style = "solid"];
|
||||
Grammar -> Adverb [style = "solid"];
|
||||
Grammar -> Numeral [style = "solid"];
|
||||
Grammar -> Sentence [style = "solid"];
|
||||
Grammar -> Question [style = "solid"];
|
||||
Grammar -> Relative [style = "solid"];
|
||||
Grammar -> Conjunction [style = "solid"];
|
||||
Grammar -> Phrase [style = "solid"];
|
||||
Grammar -> Text [style = "solid"];
|
||||
Grammar -> Idiom [style = "solid"];
|
||||
Grammar -> Structural [style = "solid"];
|
||||
|
||||
|
||||
Noun [style = "solid", shape = "ellipse", URL = "Noun.gf"];
|
||||
Noun -> Cat [style = "solid"];
|
||||
|
||||
Verb [style = "solid", shape = "ellipse", URL = "Verb.gf"];
|
||||
Verb -> Cat [style = "solid"];
|
||||
|
||||
Adjective [style = "solid", shape = "ellipse", URL = "Adjective.gf"];
|
||||
Adjective -> Cat [style = "solid"];
|
||||
|
||||
Adverb [style = "solid", shape = "ellipse", URL = "Adverb.gf"];
|
||||
Adverb -> Cat [style = "solid"];
|
||||
|
||||
Numeral [style = "solid", shape = "ellipse", URL = "Numeral.gf"];
|
||||
Numeral -> Cat [style = "solid"];
|
||||
|
||||
Sentence [style = "solid", shape = "ellipse", URL = "Sentence.gf"];
|
||||
Sentence -> Cat [style = "solid"];
|
||||
|
||||
Question [style = "solid", shape = "ellipse", URL = "Question.gf"];
|
||||
Question -> Cat [style = "solid"];
|
||||
|
||||
Relative [style = "solid", shape = "ellipse", URL = "Relative.gf"];
|
||||
Relative -> Cat [style = "solid"];
|
||||
|
||||
Conjunction [style = "solid", shape = "ellipse", URL = "Conjunction.gf"];
|
||||
Conjunction -> Cat [style = "solid"];
|
||||
|
||||
Phrase [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
|
||||
Phrase -> Cat [style = "solid"];
|
||||
|
||||
Text [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
|
||||
Text -> Cat [style = "solid"];
|
||||
|
||||
Idiom [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
|
||||
Idiom -> Cat [style = "solid"];
|
||||
|
||||
Structural [style = "solid", shape = "ellipse", URL = "Structural.gf"];
|
||||
Structural -> Cat [style = "solid"];
|
||||
|
||||
Lexicon [style = "solid", shape = "ellipse", URL = "Lexicon.gf"];
|
||||
Lexicon -> Cat [style = "solid"];
|
||||
|
||||
Cat [style = "solid", shape = "ellipse", URL = "Cat.gf"];
|
||||
Cat -> Common [style = "solid"];
|
||||
|
||||
Common [style = "solid", shape = "ellipse", URL = "Tense.gf"];
|
||||
|
||||
}
|
||||
BIN
doc/Grammar.png
|
Before Width: | Height: | Size: 77 KiB |
@@ -1,967 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
||||
<TITLE>Resource grammar writing HOWTO</TITLE>
|
||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
||||
<P ALIGN="center"><CENTER><H1>Resource grammar writing HOWTO</H1>
|
||||
<FONT SIZE="4">
|
||||
<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR>
|
||||
Last update: Mon Sep 22 14:28:01 2008
|
||||
</FONT></CENTER>
|
||||
|
||||
<P></P>
|
||||
<HR NOSHADE SIZE=1>
|
||||
<P></P>
|
||||
<UL>
|
||||
<LI><A HREF="#toc1">The resource grammar structure</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc2">Library API modules</A>
|
||||
<LI><A HREF="#toc3">Phrase category modules</A>
|
||||
<LI><A HREF="#toc4">Infrastructure modules</A>
|
||||
<LI><A HREF="#toc5">Lexical modules</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc6">Language-dependent syntax modules</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc7">The present-tense fragment</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc8">Phases of the work</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc9">Putting up a directory</A>
|
||||
<LI><A HREF="#toc10">Direction of work</A>
|
||||
<LI><A HREF="#toc11">The develop-test cycle</A>
|
||||
<LI><A HREF="#toc12">Auxiliary modules</A>
|
||||
<LI><A HREF="#toc13">Morphology and lexicon</A>
|
||||
<LI><A HREF="#toc14">Lock fields</A>
|
||||
<LI><A HREF="#toc15">Lexicon construction</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc16">Lexicon extension</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc17">The irregularity lexicon</A>
|
||||
<LI><A HREF="#toc18">Lexicon extraction from a word list</A>
|
||||
<LI><A HREF="#toc19">Lexicon extraction from raw text data</A>
|
||||
<LI><A HREF="#toc20">Bootstrapping with smart paradigms</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc21">Extending the resource grammar API</A>
|
||||
<LI><A HREF="#toc22">Using parametrized modules</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc23">Writing an instance of parametrized resource grammar implementation</A>
|
||||
<LI><A HREF="#toc24">Parametrizing a resource grammar implementation</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc25">Character encoding and transliterations</A>
|
||||
<LI><A HREF="#toc26">Coding conventions in GF</A>
|
||||
<LI><A HREF="#toc27">Transliterations</A>
|
||||
</UL>
|
||||
|
||||
<P></P>
|
||||
<HR NOSHADE SIZE=1>
|
||||
<P></P>
|
||||
<P>
|
||||
<B>History</B>
|
||||
</P>
|
||||
<P>
|
||||
September 2008: updated for Version 1.5.
|
||||
</P>
|
||||
<P>
|
||||
October 2007: updated for Version 1.2.
|
||||
</P>
|
||||
<P>
|
||||
January 2006: first version.
|
||||
</P>
|
||||
<P>
|
||||
The purpose of this document is to tell how to implement the GF
|
||||
resource grammar API for a new language. We will <I>not</I> cover how
|
||||
to use the resource grammar, nor how to change the API. But we
|
||||
will give some hints how to extend the API.
|
||||
</P>
|
||||
<P>
|
||||
A manual for using the resource grammar is found in
|
||||
</P>
|
||||
<P>
|
||||
<A HREF="../lib/resource/doc/synopsis.html"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html</CODE></A>.
|
||||
</P>
|
||||
<P>
|
||||
A tutorial on GF, also introducing the idea of resource grammars, is found in
|
||||
</P>
|
||||
<P>
|
||||
<A HREF="./gf-tutorial.html"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html</CODE></A>.
|
||||
</P>
|
||||
<P>
|
||||
This document concerns the API v. 1.5, while the current stable release is 1.4.
|
||||
You can find the code for the stable release in
|
||||
</P>
|
||||
<P>
|
||||
<A HREF="../lib/resource"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/</CODE></A>
|
||||
</P>
|
||||
<P>
|
||||
and the next release in
|
||||
</P>
|
||||
<P>
|
||||
<A HREF="../next-lib/src"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/</CODE></A>
|
||||
</P>
|
||||
<P>
|
||||
It is recommended to build new grammars to match the next release.
|
||||
</P>
|
||||
<A NAME="toc1"></A>
|
||||
<H2>The resource grammar structure</H2>
|
||||
<P>
|
||||
The library is divided into a bunch of modules, whose dependencies
|
||||
are given in the following figure.
|
||||
</P>
|
||||
<P>
|
||||
<IMG ALIGN="left" SRC="Syntax.png" BORDER="0" ALT="">
|
||||
</P>
|
||||
<P>
|
||||
Modules of different kinds are distinguished as follows:
|
||||
</P>
|
||||
<UL>
|
||||
<LI>solid contours: module seen by end users
|
||||
<LI>dashed contours: internal module
|
||||
<LI>ellipse: abstract/concrete pair of modules
|
||||
<LI>rectangle: resource or instance
|
||||
<LI>diamond: interface
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Put in another way:
|
||||
</P>
|
||||
<UL>
|
||||
<LI>solid rectangles and diamonds: user-accessible library API
|
||||
<LI>solid ellipses: user-accessible top-level grammar for parsing and linearization
|
||||
<LI>dashed contours: not visible to users
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
The dashed ellipses form the main parts of the implementation, on which the resource
|
||||
grammar programmer has to work with. She also has to work on the <CODE>Paradigms</CODE>
|
||||
module. The rest of the modules can be produced mechanically from corresponding
|
||||
modules for other languages, by just changing the language codes appearing in
|
||||
their module headers.
|
||||
</P>
|
||||
<P>
|
||||
The module structure is rather flat: most modules are direct
|
||||
parents of <CODE>Grammar</CODE>. The idea
|
||||
is that the implementors can concentrate on one linguistic aspect at a time, or
|
||||
also distribute the work among several authors. The module <CODE>Cat</CODE>
|
||||
defines the "glue" that ties the aspects together - a type system
|
||||
to which all the other modules conform, so that e.g. <CODE>NP</CODE> means
|
||||
the same thing in those modules that use <CODE>NP</CODE>s and those that
|
||||
constructs them.
|
||||
</P>
|
||||
<A NAME="toc2"></A>
|
||||
<H3>Library API modules</H3>
|
||||
<P>
|
||||
For the user of the library, these modules are the most important ones.
|
||||
In a typical application, it is enough to open <CODE>Paradigms</CODE> and <CODE>Syntax</CODE>.
|
||||
The module <CODE>Try</CODE> combines these two, making it possible to experiment
|
||||
with combinations of syntactic and lexical constructors by using the
|
||||
<CODE>cc</CODE> command in the GF shell. Here are short explanations of each API module:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><CODE>Try</CODE>: the whole resource library for a language (<CODE>Paradigms</CODE>, <CODE>Syntax</CODE>,
|
||||
<CODE>Irreg</CODE>, and <CODE>Extra</CODE>);
|
||||
produced mechanically as a collection of modules
|
||||
<LI><CODE>Syntax</CODE>: language-independent categories, syntax functions, and structural words;
|
||||
produced mechanically as a collection of modules
|
||||
<LI><CODE>Constructors</CODE>: language-independent syntax functions and structural words;
|
||||
produced mechanically via functor instantiation
|
||||
<LI><CODE>Paradigms</CODE>: language-dependent morphological paradigms
|
||||
</UL>
|
||||
|
||||
<A NAME="toc3"></A>
|
||||
<H3>Phrase category modules</H3>
|
||||
<P>
|
||||
The immediate parents of <CODE>Grammar</CODE> will be called <B>phrase category modules</B>,
|
||||
since each of them concentrates on a particular phrase category (nouns, verbs,
|
||||
adjectives, sentences,...). A phrase category module tells
|
||||
<I>how to construct phrases in that category</I>. You will find out that
|
||||
all functions in any of these modules have the same value type (or maybe
|
||||
one of a small number of different types). Thus we have
|
||||
</P>
|
||||
<UL>
|
||||
<LI><CODE>Noun</CODE>: construction of nouns and noun phrases
|
||||
<LI><CODE>Adjective</CODE>: construction of adjectival phrases
|
||||
<LI><CODE>Verb</CODE>: construction of verb phrases
|
||||
<LI><CODE>Adverb</CODE>: construction of adverbial phrases
|
||||
<LI><CODE>Numeral</CODE>: construction of cardinal and ordinal numerals
|
||||
<LI><CODE>Sentence</CODE>: construction of sentences and imperatives
|
||||
<LI><CODE>Question</CODE>: construction of questions
|
||||
<LI><CODE>Relative</CODE>: construction of relative clauses
|
||||
<LI><CODE>Conjunction</CODE>: coordination of phrases
|
||||
<LI><CODE>Phrase</CODE>: construction of the major units of text and speech
|
||||
<LI><CODE>Text</CODE>: construction of texts as sequences of phrases
|
||||
<LI><CODE>Idiom</CODE>: idiomatic expressions such as existentials
|
||||
</UL>
|
||||
|
||||
<A NAME="toc4"></A>
|
||||
<H3>Infrastructure modules</H3>
|
||||
<P>
|
||||
Expressions of each phrase category are constructed in the corresponding
|
||||
phrase category module. But their <I>use</I> takes mostly place in other modules.
|
||||
For instance, noun phrases, which are constructed in <CODE>Noun</CODE>, are
|
||||
used as arguments of functions of almost all other phrase category modules.
|
||||
How can we build all these modules independently of each other?
|
||||
</P>
|
||||
<P>
|
||||
As usual in typeful programming, the <I>only</I> thing you need to know
|
||||
about an object you use is its type. When writing a linearization rule
|
||||
for a GF abstract syntax function, the only thing you need to know is
|
||||
the linearization types of its value and argument categories. To achieve
|
||||
the division of the resource grammar to several parallel phrase category modules,
|
||||
what we need is an underlying definition of the linearization types. This
|
||||
definition is given as the implementation of
|
||||
</P>
|
||||
<UL>
|
||||
<LI><CODE>Cat</CODE>: syntactic categories of the resource grammar
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Any resource grammar implementation has first to agree on how to implement
|
||||
<CODE>Cat</CODE>. Luckily enough, even this can be done incrementally: you
|
||||
can skip the <CODE>lincat</CODE> definition of a category and use the default
|
||||
<CODE>{s : Str}</CODE> until you need to change it to something else. In
|
||||
English, for instance, many categories do have this linearization type.
|
||||
</P>
|
||||
<A NAME="toc5"></A>
|
||||
<H3>Lexical modules</H3>
|
||||
<P>
|
||||
What is lexical and what is syntactic is not as clearcut in GF as in
|
||||
some other grammar formalisms. Logically, lexical means atom, i.e. a
|
||||
<CODE>fun</CODE> with no arguments. Linguistically, one may add to this
|
||||
that the <CODE>lin</CODE> consists of only one token (or of a table whose values
|
||||
are single tokens). Even in the restricted lexicon included in the resource
|
||||
API, the latter rule is sometimes violated in some languages. For instance,
|
||||
<CODE>Structural.both7and_DConj</CODE> is an atom, but its linearization is
|
||||
two words e.g. <I>both - and</I>.
|
||||
</P>
|
||||
<P>
|
||||
Another characterization of lexical is that lexical units can be added
|
||||
almost <I>ad libitum</I>, and they cannot be defined in terms of already
|
||||
given rules. The lexical modules of the resource API are thus more like
|
||||
samples than complete lists. There are two such modules:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><CODE>Structural</CODE>: structural words (determiners, conjunctions,...)
|
||||
<LI><CODE>Lexicon</CODE>: basic everyday content words (nouns, verbs,...)
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
The module <CODE>Structural</CODE> aims for completeness, and is likely to
|
||||
be extended in future releases of the resource. The module <CODE>Lexicon</CODE>
|
||||
gives a "random" list of words, which enables testing the syntax.
|
||||
It also provides a check list for morphology, since those words are likely to include
|
||||
most morphological patterns of the language.
|
||||
</P>
|
||||
<P>
|
||||
In the case of <CODE>Lexicon</CODE> it may come out clearer than anywhere else
|
||||
in the API that it is impossible to give exact translation equivalents in
|
||||
different languages on the level of a resource grammar. This is no problem,
|
||||
since application grammars can use the resource in different ways for
|
||||
different languages.
|
||||
</P>
|
||||
<A NAME="toc6"></A>
|
||||
<H2>Language-dependent syntax modules</H2>
|
||||
<P>
|
||||
In addition to the common API, there is room for language-dependent extensions
|
||||
of the resource. The top level of each languages looks as follows (with German
|
||||
as example):
|
||||
</P>
|
||||
<PRE>
|
||||
abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs
|
||||
</PRE>
|
||||
<P>
|
||||
where <CODE>ExtraGerAbs</CODE> is a collection of syntactic structures specific to German,
|
||||
and <CODE>IrregGerAbs</CODE> is a dictionary of irregular words of German
|
||||
(at the moment, just verbs). Each of these language-specific grammars has
|
||||
the potential to grow into a full-scale grammar of the language. These grammar
|
||||
can also be used as libraries, but the possibility of using functors is lost.
|
||||
</P>
|
||||
<P>
|
||||
To give a better overview of language-specific structures,
|
||||
modules like <CODE>ExtraGerAbs</CODE>
|
||||
are built from a language-independent module <CODE>ExtraAbs</CODE>
|
||||
by restricted inheritance:
|
||||
</P>
|
||||
<PRE>
|
||||
abstract ExtraGerAbs = Extra [f,g,...]
|
||||
</PRE>
|
||||
<P>
|
||||
Thus any category and function in <CODE>Extra</CODE> may be shared by a subset of all
|
||||
languages. One can see this set-up as a matrix, which tells
|
||||
what <CODE>Extra</CODE> structures
|
||||
are implemented in what languages. For the common API in <CODE>Grammar</CODE>, the matrix
|
||||
is filled with 1's (everything is implemented in every language).
|
||||
</P>
|
||||
<P>
|
||||
In a minimal resource grammar implementation, the language-dependent
|
||||
extensions are just empty modules, but it is good to provide them for
|
||||
the sake of uniformity.
|
||||
</P>
|
||||
<A NAME="toc7"></A>
|
||||
<H3>The present-tense fragment</H3>
|
||||
<P>
|
||||
Some lines in the resource library are suffixed with the comment
|
||||
</P>
|
||||
<PRE>
|
||||
--# notpresent
|
||||
</PRE>
|
||||
<P>
|
||||
which is used by a preprocessor to exclude those lines from
|
||||
a reduced version of the full resource. This present-tense-only
|
||||
version is useful for applications in most technical text, since
|
||||
they reduce the grammar size and compilation time. It can also
|
||||
be useful to exclude those lines in a first version of resource
|
||||
implementation. To compile a grammar with present-tense-only, use
|
||||
</P>
|
||||
<PRE>
|
||||
make Present
|
||||
</PRE>
|
||||
<P>
|
||||
with <CODE>resource/Makefile</CODE>.
|
||||
</P>
|
||||
<A NAME="toc8"></A>
|
||||
<H2>Phases of the work</H2>
|
||||
<A NAME="toc9"></A>
|
||||
<H3>Putting up a directory</H3>
|
||||
<P>
|
||||
Unless you are writing an instance of a parametrized implementation
|
||||
(Romance or Scandinavian), which will be covered later, the
|
||||
simplest way is to follow roughly the following procedure. Assume you
|
||||
are building a grammar for the German language. Here are the first steps,
|
||||
which we actually followed ourselves when building the German implementation
|
||||
of resource v. 1.0 at Ubuntu linux. We have slightly modified them to
|
||||
match resource v. 1.5 and GF v. 3.0.
|
||||
</P>
|
||||
<OL>
|
||||
<LI>Create a sister directory for <CODE>GF/lib/resource/english</CODE>, named
|
||||
<CODE>german</CODE>.
|
||||
<PRE>
|
||||
cd GF/lib/resource/
|
||||
mkdir german
|
||||
cd german
|
||||
</PRE>
|
||||
<P></P>
|
||||
<LI>Check out the [ISO 639 3-letter language code
|
||||
<A HREF="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">http://www.w3.org/WAI/ER/IG/ert/iso639.htm</A>]
|
||||
for German: both <CODE>Ger</CODE> and <CODE>Deu</CODE> are given, and we pick <CODE>Ger</CODE>.
|
||||
(We use the 3-letter codes rather than the more common 2-letter codes,
|
||||
since they will suffice for many more languages!)
|
||||
<P></P>
|
||||
<LI>Copy the <CODE>*Eng.gf</CODE> files from <CODE>english</CODE> <CODE>german</CODE>,
|
||||
and rename them:
|
||||
<PRE>
|
||||
cp ../english/*Eng.gf .
|
||||
rename 's/Eng/Ger/' *Eng.gf
|
||||
</PRE>
|
||||
If you don't have the <CODE>rename</CODE> command, you can use a bash script with <CODE>mv</CODE>.
|
||||
</OL>
|
||||
|
||||
<OL>
|
||||
<LI>Change the <CODE>Eng</CODE> module references to <CODE>Ger</CODE> references
|
||||
in all files:
|
||||
<PRE>
|
||||
sed -i 's/English/German/g' *Ger.gf
|
||||
sed -i 's/Eng/Ger/g' *Ger.gf
|
||||
</PRE>
|
||||
The first line prevents changing the word <CODE>English</CODE>, which appears
|
||||
here and there in comments, to <CODE>Gerlish</CODE>. The <CODE>sed</CODE> command syntax
|
||||
may vary depending on your operating system.
|
||||
<P></P>
|
||||
<LI>This may of course change unwanted occurrences of the
|
||||
string <CODE>Eng</CODE> - verify this by
|
||||
<PRE>
|
||||
grep Ger *.gf
|
||||
</PRE>
|
||||
But you will have to make lots of manual changes in all files anyway!
|
||||
<P></P>
|
||||
<LI>Comment out the contents of these files:
|
||||
<PRE>
|
||||
sed -i 's/^/--/' *Ger.gf
|
||||
</PRE>
|
||||
This will give you a set of templates out of which the grammar
|
||||
will grow as you uncomment and modify the files rule by rule.
|
||||
<P></P>
|
||||
<LI>In all <CODE>.gf</CODE> files, uncomment the module headers and brackets,
|
||||
leaving the module bodies commented. Unfortunately, there is no
|
||||
simple way to do this automatically (or to avoid commenting these
|
||||
lines in the previous step) - but uncommenting the first
|
||||
and the last lines will actually do the job for many of the files.
|
||||
<P></P>
|
||||
<LI>Uncomment the contents of the main grammar file:
|
||||
<PRE>
|
||||
sed -i 's/^--//' LangGer.gf
|
||||
</PRE>
|
||||
<P></P>
|
||||
<LI>Now you can open the grammar <CODE>LangGer</CODE> in GF:
|
||||
<PRE>
|
||||
gf LangGer.gf
|
||||
</PRE>
|
||||
You will get lots of warnings on missing rules, but the grammar will compile.
|
||||
<P></P>
|
||||
<LI>At all the following steps you will now have a valid, but incomplete
|
||||
GF grammar. The GF command
|
||||
<PRE>
|
||||
pg -missing
|
||||
</PRE>
|
||||
tells you what exactly is missing.
|
||||
</OL>
|
||||
|
||||
<P>
|
||||
Here is the module structure of <CODE>LangGer</CODE>. It has been simplified by leaving out
|
||||
the majority of the phrase category modules. Each of them has the same dependencies
|
||||
as <CODE>VerbGer</CODE>, whose complete dependencies are shown as an example.
|
||||
</P>
|
||||
<P>
|
||||
<IMG ALIGN="middle" SRC="German.png" BORDER="0" ALT="">
|
||||
</P>
|
||||
<A NAME="toc10"></A>
|
||||
<H3>Direction of work</H3>
|
||||
<P>
|
||||
The real work starts now. There are many ways to proceed, the most obvious ones being
|
||||
</P>
|
||||
<UL>
|
||||
<LI>Top-down: start from the module <CODE>Phrase</CODE> and go down to <CODE>Sentence</CODE>, then
|
||||
<CODE>Verb</CODE>, <CODE>Noun</CODE>, and in the end <CODE>Lexicon</CODE>. In this way, you are all the time
|
||||
building complete phrases, and add them with more content as you proceed.
|
||||
<B>This approach is not recommended</B>. It is impossible to test the rules if
|
||||
you have no words to apply the constructions to.
|
||||
<P></P>
|
||||
<LI>Bottom-up: set as your first goal to implement <CODE>Lexicon</CODE>. To this end, you
|
||||
need to write <CODE>ParadigmsGer</CODE>, which in turn needs parts of
|
||||
<CODE>MorphoGer</CODE> and <CODE>ResGer</CODE>.
|
||||
<B>This approach is not recommended</B>. You can get stuck to details of
|
||||
morphology such as irregular words, and you don't have enough grasp about
|
||||
the type system to decide what forms to cover in morphology.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
The practical working direction is thus a saw-like motion between the morphological
|
||||
and top-level modules. Here is a possible course of the work that gives enough
|
||||
test data and enough general view at any point:
|
||||
</P>
|
||||
<OL>
|
||||
<LI>Define <CODE>Cat.N</CODE> and the required parameter types in <CODE>ResGer</CODE>. As we define
|
||||
<PRE>
|
||||
lincat N = {s : Number => Case => Str ; g : Gender} ;
|
||||
</PRE>
|
||||
we need the parameter types <CODE>Number</CODE>, <CODE>Case</CODE>, and <CODE>Gender</CODE>. The definition
|
||||
of <CODE>Number</CODE> in <A HREF="../lib/resource/common/ParamX.gf"><CODE>common/ParamX</CODE></A>
|
||||
works for German, so we
|
||||
use it and just define <CODE>Case</CODE> and <CODE>Gender</CODE> in <CODE>ResGer</CODE>.
|
||||
<P></P>
|
||||
<LI>Define some cases of <CODE>mkN</CODE> in <CODE>ParadigmsGer</CODE>. In this way you can
|
||||
already implement a huge amount of nouns correctly in <CODE>LexiconGer</CODE>. Actually
|
||||
just adding the worst-case instance of <CODE>mkN</CODE> (the one taking the most
|
||||
arguments) should suffice for every noun - but,
|
||||
since it is tedious to use, you
|
||||
might proceed to the next step before returning to morphology and defining the
|
||||
real work horse, <CODE>mkN</CODE> taking two forms and a gender.
|
||||
<P></P>
|
||||
<LI>While doing this, you may want to test the resource independently. Do this by
|
||||
starting the GF shell in the <CODE>resource</CODE> directory, by the commands
|
||||
<PRE>
|
||||
> i -retain german/ParadigmsGer
|
||||
> cc -table mkN "Kirche"
|
||||
</PRE>
|
||||
<P></P>
|
||||
<LI>Proceed to determiners and pronouns in
|
||||
<CODE>NounGer</CODE> (<CODE>DetCN UsePron DetQuant NumSg DefArt IndefArt UseN</CODE>) and
|
||||
<CODE>StructuralGer</CODE> (<CODE>i_Pron this_Quant</CODE>). You also need some categories and
|
||||
parameter types. At this point, it is maybe not possible to find out the final
|
||||
linearization types of <CODE>CN</CODE>, <CODE>NP</CODE>, <CODE>Det</CODE>, and <CODE>Quant</CODE>, but at least you should
|
||||
be able to correctly inflect noun phrases such as <I>every airplane</I>:
|
||||
<PRE>
|
||||
> i german/LangGer.gf
|
||||
> l -table DetCN every_Det (UseN airplane_N)
|
||||
|
||||
Nom: jeder Flugzeug
|
||||
Acc: jeden Flugzeug
|
||||
Dat: jedem Flugzeug
|
||||
Gen: jedes Flugzeugs
|
||||
</PRE>
|
||||
<P></P>
|
||||
<LI>Proceed to verbs: define <CODE>CatGer.V</CODE>, <CODE>ResGer.VForm</CODE>, and
|
||||
<CODE>ParadigmsGer.mkV</CODE>. You may choose to exclude <CODE>notpresent</CODE>
|
||||
cases at this point. But anyway, you will be able to inflect a good
|
||||
number of verbs in <CODE>Lexicon</CODE>, such as
|
||||
<CODE>live_V</CODE> (<CODE>mkV "leben"</CODE>).
|
||||
<P></P>
|
||||
<LI>Now you can soon form your first sentences: define <CODE>VP</CODE> and
|
||||
<CODE>Cl</CODE> in <CODE>CatGer</CODE>, <CODE>VerbGer.UseV</CODE>, and <CODE>SentenceGer.PredVP</CODE>.
|
||||
Even if you have excluded the tenses, you will be able to produce
|
||||
<PRE>
|
||||
> i -preproc=./mkPresent german/LangGer.gf
|
||||
> l -table PredVP (UsePron i_Pron) (UseV live_V)
|
||||
|
||||
Pres Simul Pos Main: ich lebe
|
||||
Pres Simul Pos Inv: lebe ich
|
||||
Pres Simul Pos Sub: ich lebe
|
||||
Pres Simul Neg Main: ich lebe nicht
|
||||
Pres Simul Neg Inv: lebe ich nicht
|
||||
Pres Simul Neg Sub: ich nicht lebe
|
||||
</PRE>
|
||||
You should also be able to parse:
|
||||
<PRE>
|
||||
> p -cat=Cl "ich lebe"
|
||||
PredVP (UsePron i_Pron) (UseV live_V)
|
||||
</PRE>
|
||||
<P></P>
|
||||
<LI>Transitive verbs
|
||||
(<CODE>CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a</CODE>)
|
||||
are a natural next step, so that you can
|
||||
produce <CODE>ich liebe dich</CODE> ("I love you").
|
||||
<P></P>
|
||||
<LI>Adjectives (<CODE>CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA</CODE>)
|
||||
will force you to think about strong and weak declensions, so that you can
|
||||
correctly inflect <I>mein neuer Wagen, dieser neue Wagen</I>
|
||||
("my new car, this new car").
|
||||
<P></P>
|
||||
<LI>Once you have implemented the set
|
||||
(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP),
|
||||
you have overcome most of difficulties. You know roughly what parameters
|
||||
and dependences there are in your language, and you can now proceed very
|
||||
much in the order you please.
|
||||
</OL>
|
||||
|
||||
<A NAME="toc11"></A>
|
||||
<H3>The develop-test cycle</H3>
|
||||
<P>
|
||||
The following develop-test cycle will
|
||||
be applied most of the time, both in the first steps described above
|
||||
and in later steps where you are more on your own.
|
||||
</P>
|
||||
<OL>
|
||||
<LI>Select a phrase category module, e.g. <CODE>NounGer</CODE>, and uncomment some
|
||||
linearization rules (for instance, <CODE>DetCN</CODE>, as above).
|
||||
<P></P>
|
||||
<LI>Write down some German examples of this rule, for instance translations
|
||||
of "the dog", "the house", "the big house", etc. Write these in all their
|
||||
different forms (two numbers and four cases).
|
||||
<P></P>
|
||||
<LI>Think about the categories involved (<CODE>CN, NP, N, Det</CODE>) and the
|
||||
variations they have. Encode this in the lincats of <CODE>CatGer</CODE>.
|
||||
You may have to define some new parameter types in <CODE>ResGer</CODE>.
|
||||
<P></P>
|
||||
<LI>To be able to test the construction,
|
||||
define some words you need to instantiate it
|
||||
in <CODE>LexiconGer</CODE>. You will also need some regular inflection patterns
|
||||
in<CODE>ParadigmsGer</CODE>.
|
||||
<P></P>
|
||||
<LI>Test by parsing, linearization,
|
||||
and random generation. In particular, linearization to a table should
|
||||
be used so that you see all forms produced; the <CODE>treebank</CODE> option
|
||||
preserves the tree
|
||||
<PRE>
|
||||
> gr -cat=NP -number=20 | l -table -treebank
|
||||
</PRE>
|
||||
<P></P>
|
||||
<LI>Save some tree-linearization pairs for later regression testing. You can save
|
||||
a gold standard treebank and use the Unix <CODE>diff</CODE> command to compare later
|
||||
linearizations produced from the same list of trees. If you save the trees
|
||||
in a file <CODE>trees</CODE>, you can do as follows:
|
||||
<PRE>
|
||||
> rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank
|
||||
</PRE>
|
||||
<P></P>
|
||||
<LI>A file with trees testing all resource functions is included in the resource,
|
||||
entitled <CODE>resource/exx-resource.gft</CODE>. A treebank can be created from this by
|
||||
the Unix command
|
||||
<PRE>
|
||||
% runghc Make.hs test langs=Ger
|
||||
</PRE>
|
||||
</OL>
|
||||
|
||||
<P>
|
||||
You are likely to run this cycle a few times for each linearization rule
|
||||
you implement, and some hundreds of times altogether. There are roughly
|
||||
70 <CODE>cat</CODE>s and
|
||||
600 <CODE>funs</CODE> in <CODE>Lang</CODE> at the moment; 170 of the <CODE>funs</CODE> are outside the two
|
||||
lexicon modules).
|
||||
</P>
|
||||
<A NAME="toc12"></A>
|
||||
<H3>Auxiliary modules</H3>
|
||||
<P>
|
||||
These auxuliary <CODE>resource</CODE> modules will be written by you.
|
||||
</P>
|
||||
<UL>
|
||||
<LI><CODE>ResGer</CODE>: parameter types and auxiliary operations
|
||||
(a resource for the resource grammar!)
|
||||
<LI><CODE>ParadigmsGer</CODE>: complete inflection engine and most important regular paradigms
|
||||
<LI><CODE>MorphoGer</CODE>: auxiliaries for <CODE>ParadigmsGer</CODE> and <CODE>StructuralGer</CODE>. This need
|
||||
not be separate from <CODE>ResGer</CODE>.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
These modules are language-independent and provided by the existing resource
|
||||
package.
|
||||
</P>
|
||||
<UL>
|
||||
<LI><CODE>ParamX</CODE>: parameter types used in many languages
|
||||
<LI><CODE>CommonX</CODE>: implementation of language-uniform categories
|
||||
such as $Text$ and $Phr$, as well as of
|
||||
the logical tense, anteriority, and polarity parameters
|
||||
<LI><CODE>Coordination</CODE>: operations to deal with lists and coordination
|
||||
<LI><CODE>Prelude</CODE>: general-purpose operations on strings, records,
|
||||
truth values, etc.
|
||||
<LI><CODE>Predef</CODE>: general-purpose operations with hard-coded definitions
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
An important decision is what rules to implement in terms of operations in
|
||||
<CODE>ResGer</CODE>. The <B>golden rule of functional programming</B> says:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><I>Whenever you find yourself programming by copy and paste, write a function instead!</I>.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
This rule suggests that an operation should be created if it is to be
|
||||
used at least twice. At the same time, a sound principle of <B>vicinity</B> says:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><I>It should not require too much browsing to understand what a piece of code does.</I>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
From these two principles, we have derived the following practice:
|
||||
</P>
|
||||
<UL>
|
||||
<LI>If an operation is needed <I>in two different modules</I>,
|
||||
it should be created in as an <CODE>oper</CODE> in <CODE>ResGer</CODE>. An example is <CODE>mkClause</CODE>,
|
||||
used in <CODE>Sentence</CODE>, <CODE>Question</CODE>, and <CODE>Relative</CODE>-
|
||||
<LI>If an operation is needed <I>twice in the same module</I>, but never
|
||||
outside, it should be created in the same module. Many examples are
|
||||
found in <CODE>Numerals</CODE>.
|
||||
<LI>If an operation is needed <I>twice in the same judgement</I>, but never
|
||||
outside, it should be created by a <CODE>let</CODE> definition.
|
||||
<LI>If an operation is only needed once, it should not be created as an <CODE>oper</CODE>,
|
||||
but rather inlined. However, a <CODE>let</CODE> definition may well be in place just
|
||||
to make the readable.
|
||||
Most functions in phrase category modules
|
||||
are implemented in this way.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
This discipline is very different from the one followed in early
|
||||
versions of the library (up to 0.9). We then valued the principle of
|
||||
abstraction more than vicinity, creating layers of abstraction for
|
||||
almost everything. This led in practice to the duplication of almost
|
||||
all code on the <CODE>lin</CODE> and <CODE>oper</CODE> levels, and made the code
|
||||
hard to understand and maintain.
|
||||
</P>
|
||||
<A NAME="toc13"></A>
|
||||
<H3>Morphology and lexicon</H3>
|
||||
<P>
|
||||
The paradigms needed to implement
|
||||
<CODE>LexiconGer</CODE> are defined in
|
||||
<CODE>ParadigmsGer</CODE>.
|
||||
This module provides high-level ways to define the linearization of
|
||||
lexical items, of categories <CODE>N, A, V</CODE> and their complement-taking
|
||||
variants.
|
||||
</P>
|
||||
<P>
|
||||
For ease of use, the <CODE>Paradigms</CODE> modules follow a certain
|
||||
naming convention. Thus they for each lexical category, such as <CODE>N</CODE>,
|
||||
the overloaded functions, such as <CODE>mkN</CODE>, with the following cases:
|
||||
</P>
|
||||
<UL>
|
||||
<LI>the worst-case construction of <CODE>N</CODE>. Its type signature
|
||||
has the form
|
||||
<PRE>
|
||||
mkN : Str -> ... -> Str -> P -> ... -> Q -> N
|
||||
</PRE>
|
||||
with as many string and parameter arguments as can ever be needed to
|
||||
construct an <CODE>N</CODE>.
|
||||
<LI>the most regular cases, with just one string argument:
|
||||
<PRE>
|
||||
mkN : Str -> N
|
||||
</PRE>
|
||||
<LI>A language-dependent (small) set of functions to handle mild irregularities
|
||||
and common exceptions.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
For the complement-taking variants, such as <CODE>V2</CODE>, we provide
|
||||
</P>
|
||||
<UL>
|
||||
<LI>a case that takes a <CODE>V</CODE> and all necessary arguments, such
|
||||
as case and preposition:
|
||||
<PRE>
|
||||
mkV2 : V -> Case -> Str -> V2 ;
|
||||
</PRE>
|
||||
<LI>a case that takes a <CODE>Str</CODE> and produces a transitive verb with the direct
|
||||
object case:
|
||||
<PRE>
|
||||
mkV2 : Str -> V2 ;
|
||||
</PRE>
|
||||
<LI>A language-dependent (small) set of functions to handle common special cases,
|
||||
such as transitive verbs that are not regular:
|
||||
<PRE>
|
||||
mkV2 : V -> V2 ;
|
||||
</PRE>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
The golden rule for the design of paradigms is that
|
||||
</P>
|
||||
<UL>
|
||||
<LI><I>The user of the library will only need function applications with constants and strings, never any records or tables.</I>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
The discipline of data abstraction moreover requires that the user of the resource
|
||||
is not given access to parameter constructors, but only to constants that denote
|
||||
them. This gives the resource grammarian the freedom to change the underlying
|
||||
data representation if needed. It means that the <CODE>ParadigmsGer</CODE> module has
|
||||
to define constants for those parameter types and constructors that
|
||||
the application grammarian may need to use, e.g.
|
||||
</P>
|
||||
<PRE>
|
||||
oper
|
||||
Case : Type ;
|
||||
nominative, accusative, genitive, dative : Case ;
|
||||
</PRE>
|
||||
<P>
|
||||
These constants are defined in terms of parameter types and constructors
|
||||
in <CODE>ResGer</CODE> and <CODE>MorphoGer</CODE>, which modules are not
|
||||
visible to the application grammarian.
|
||||
</P>
|
||||
<A NAME="toc14"></A>
|
||||
<H3>Lock fields</H3>
|
||||
<P>
|
||||
An important difference between <CODE>MorphoGer</CODE> and
|
||||
<CODE>ParadigmsGer</CODE> is that the former uses "raw" record types
|
||||
for word classes, whereas the latter used category symbols defined in
|
||||
<CODE>CatGer</CODE>. When these category symbols are used to denote
|
||||
record types in a resource modules, such as <CODE>ParadigmsGer</CODE>,
|
||||
a <B>lock field</B> is added to the record, so that categories
|
||||
with the same implementation are not confused with each other.
|
||||
(This is inspired by the <CODE>newtype</CODE> discipline in Haskell.)
|
||||
For instance, the lincats of adverbs and conjunctions are the same
|
||||
in <CODE>CommonX</CODE> (and therefore in <CODE>CatGer</CODE>, which inherits it):
|
||||
</P>
|
||||
<PRE>
|
||||
lincat Adv = {s : Str} ;
|
||||
lincat Conj = {s : Str} ;
|
||||
</PRE>
|
||||
<P>
|
||||
But when these category symbols are used to denote their linearization
|
||||
types in resource module, these definitions are translated to
|
||||
</P>
|
||||
<PRE>
|
||||
oper Adv : Type = {s : Str ; lock_Adv : {}} ;
|
||||
oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
|
||||
</PRE>
|
||||
<P>
|
||||
In this way, the user of a resource grammar cannot confuse adverbs with
|
||||
conjunctions. In other words, the lock fields force the type checker
|
||||
to function as grammaticality checker.
|
||||
</P>
|
||||
<P>
|
||||
When the resource grammar is <CODE>open</CODE>ed in an application grammar, the
|
||||
lock fields are never seen (except possibly in type error messages),
|
||||
and the application grammarian should never write them herself. If she
|
||||
has to do this, it is a sign that the resource grammar is incomplete, and
|
||||
the proper way to proceed is to fix the resource grammar.
|
||||
</P>
|
||||
<P>
|
||||
The resource grammarian has to provide the dummy lock field values
|
||||
in her hidden definitions of constants in <CODE>Paradigms</CODE>. For instance,
|
||||
</P>
|
||||
<PRE>
|
||||
mkAdv : Str -> Adv ;
|
||||
-- mkAdv s = {s = s ; lock_Adv = <>} ;
|
||||
</PRE>
|
||||
<P></P>
|
||||
<A NAME="toc15"></A>
|
||||
<H3>Lexicon construction</H3>
|
||||
<P>
|
||||
The lexicon belonging to <CODE>LangGer</CODE> consists of two modules:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><CODE>StructuralGer</CODE>, structural words, built by using both
|
||||
<CODE>ParadigmsGer</CODE> and <CODE>MorphoGer</CODE>.
|
||||
<LI><CODE>LexiconGer</CODE>, content words, built by using <CODE>ParadigmsGer</CODE> only.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
The reason why <CODE>MorphoGer</CODE> has to be used in <CODE>StructuralGer</CODE>
|
||||
is that <CODE>ParadigmsGer</CODE> does not contain constructors for closed
|
||||
word classes such as pronouns and determiners. The reason why we
|
||||
recommend <CODE>ParadigmsGer</CODE> for building <CODE>LexiconGer</CODE> is that
|
||||
the coverage of the paradigms gets thereby tested and that the
|
||||
use of the paradigms in <CODE>LexiconGer</CODE> gives a good set of examples for
|
||||
those who want to build new lexica.
|
||||
</P>
|
||||
<A NAME="toc16"></A>
|
||||
<H2>Lexicon extension</H2>
|
||||
<A NAME="toc17"></A>
|
||||
<H3>The irregularity lexicon</H3>
|
||||
<P>
|
||||
It is useful in most languages to provide a separate module of irregular
|
||||
verbs and other words which are difficult for a lexicographer
|
||||
to handle. There are usually a limited number of such words - a
|
||||
few hundred perhaps. Building such a lexicon separately also
|
||||
makes it less important to cover <I>everything</I> by the
|
||||
worst-case variants of the paradigms <CODE>mkV</CODE> etc.
|
||||
</P>
|
||||
<A NAME="toc18"></A>
|
||||
<H3>Lexicon extraction from a word list</H3>
|
||||
<P>
|
||||
You can often find resources such as lists of
|
||||
irregular verbs on the internet. For instance, the
|
||||
Irregular German Verb page
|
||||
previously found in
|
||||
<CODE>http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html</CODE>
|
||||
page gives a list of verbs in the
|
||||
traditional tabular format, which begins as follows:
|
||||
</P>
|
||||
<PRE>
|
||||
backen (du bäckst, er bäckt) backte [buk] gebacken
|
||||
befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
|
||||
beginnen begann (begönne; begänne) begonnen
|
||||
beißen biß gebissen
|
||||
</PRE>
|
||||
<P>
|
||||
All you have to do is to write a suitable verb paradigm
|
||||
</P>
|
||||
<PRE>
|
||||
irregV : (x1,_,_,_,_,x6 : Str) -> V ;
|
||||
</PRE>
|
||||
<P>
|
||||
and a Perl or Python or Haskell script that transforms
|
||||
the table to
|
||||
</P>
|
||||
<PRE>
|
||||
backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
|
||||
befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
|
||||
</PRE>
|
||||
<P></P>
|
||||
<P>
|
||||
When using ready-made word lists, you should think about
|
||||
coyright issues. All resource grammar material should
|
||||
be provided under GNU Lesser General Public License (LGPL).
|
||||
</P>
|
||||
<A NAME="toc19"></A>
|
||||
<H3>Lexicon extraction from raw text data</H3>
|
||||
<P>
|
||||
This is a cheap technique to build a lexicon of thousands
|
||||
of words, if text data is available in digital format.
|
||||
See the <A HREF="http://www.cs.chalmers.se/~markus/extract/">Extract Homepage</A>
|
||||
homepage for details.
|
||||
</P>
|
||||
<A NAME="toc20"></A>
|
||||
<H3>Bootstrapping with smart paradigms</H3>
|
||||
<P>
|
||||
This is another cheap technique, where you need as input a list of words with
|
||||
part-of-speech marking. You initialize the lexicon by using the one-argument
|
||||
<CODE>mkN</CODE> etc paradigms, and add forms to those words that do not come out right.
|
||||
This procedure is described in the paper
|
||||
</P>
|
||||
<P>
|
||||
A. Ranta.
|
||||
How predictable is Finnish morphology? An experiment on lexicon construction.
|
||||
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
|
||||
<I>Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein</I>,
|
||||
University of Uppsala,
|
||||
2008.
|
||||
Available from the <A HREF="http://publications.uu.se/abstract.xsql?dbid=8933">series homepage</A>
|
||||
</P>
|
||||
<A NAME="toc21"></A>
|
||||
<H2>Extending the resource grammar API</H2>
|
||||
<P>
|
||||
Sooner or later it will happen that the resource grammar API
|
||||
does not suffice for all applications. A common reason is
|
||||
that it does not include idiomatic expressions in a given language.
|
||||
The solution then is in the first place to build language-specific
|
||||
extension modules, like <CODE>ExtraGer</CODE>.
|
||||
</P>
|
||||
<A NAME="toc22"></A>
|
||||
<H2>Using parametrized modules</H2>
|
||||
<A NAME="toc23"></A>
|
||||
<H3>Writing an instance of parametrized resource grammar implementation</H3>
|
||||
<P>
|
||||
Above we have looked at how a resource implementation is built by
|
||||
the copy and paste method (from English to German), that is, formally
|
||||
speaking, from scratch. A more elegant solution available for
|
||||
families of languages such as Romance and Scandinavian is to
|
||||
use parametrized modules. The advantages are
|
||||
</P>
|
||||
<UL>
|
||||
<LI>theoretical: linguistic generalizations and insights
|
||||
<LI>practical: maintainability improves with fewer components
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Here is a set of
|
||||
<A HREF="http://www.cs.chalmers.se/~aarne/geocal2006.pdf">slides</A>
|
||||
on the topic.
|
||||
</P>
|
||||
<A NAME="toc24"></A>
|
||||
<H3>Parametrizing a resource grammar implementation</H3>
|
||||
<P>
|
||||
This is the most demanding form of resource grammar writing.
|
||||
We do <I>not</I> recommend the method of parametrizing from the
|
||||
beginning: it is easier to have one language first implemented
|
||||
in the conventional way and then add another language of the
|
||||
same family by aprametrization. This means that the copy and
|
||||
paste method is still used, but at this time the differences
|
||||
are put into an <CODE>interface</CODE> module.
|
||||
</P>
|
||||
<A NAME="toc25"></A>
|
||||
<H2>Character encoding and transliterations</H2>
|
||||
<P>
|
||||
This section is relevant for languages using a non-ASCII character set.
|
||||
</P>
|
||||
<A NAME="toc26"></A>
|
||||
<H2>Coding conventions in GF</H2>
|
||||
<P>
|
||||
From version 3.0, GF follows a simple encoding convention:
|
||||
</P>
|
||||
<UL>
|
||||
<LI>GF source files may follow any encoding, such as isolatin-1 or UTF-8;
|
||||
the default is isolatin-1, and UTF8 must be indicated by the judgement
|
||||
<PRE>
|
||||
flags coding = utf8 ;
|
||||
</PRE>
|
||||
in each source module.
|
||||
<LI>for internal processing, all characters are converted to 16-bit unicode,
|
||||
as the first step of grammar compilation guided by the <CODE>coding</CODE> flag
|
||||
<LI>as the last step of compilation, all characters are converted to UTF-8
|
||||
<LI>thus, GF object files (<CODE>gfo</CODE>) and the Portable Grammar Format (<CODE>pgf</CODE>)
|
||||
are in UTF-8
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Most current resource grammars use isolatin-1 in the source, but this does
|
||||
not affect their use in parallel with grammars written in other encodings.
|
||||
In fact, a grammar can be put up from modules using different codings.
|
||||
</P>
|
||||
<P>
|
||||
<B>Warning</B>. While string literals may contain any characters, identifiers
|
||||
must be isolatin-1 letters (or digits, underscores, or dashes). This has to
|
||||
do with the restrictions of the lexer tool that is used.
|
||||
</P>
|
||||
<A NAME="toc27"></A>
|
||||
<H2>Transliterations</H2>
|
||||
<P>
|
||||
While UTF-8 is well supported by most web browsers, its use in terminals and
|
||||
text editors may cause disappointment. Many grammarians therefore prefer to
|
||||
use ASCII transliterations. GF 3.0beta2 provides the following built-in
|
||||
transliterations:
|
||||
</P>
|
||||
<UL>
|
||||
<LI>Arabic
|
||||
<LI>Devanagari (Hindi)
|
||||
<LI>Thai
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
New transliterations can be defined in the GF source file
|
||||
<A HREF="../src/GF/Text/Transliterations.hs"><CODE>GF/Text/Transliterations.hs</CODE></A>.
|
||||
This file also gives instructions on how new ones are added.
|
||||
</P>
|
||||
|
||||
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
|
||||
<!-- cmdline: txt2tags -\-toc Resource-HOWTO.txt -->
|
||||
</BODY></HTML>
|
||||
@@ -1,827 +0,0 @@
|
||||
Resource grammar writing HOWTO
|
||||
Author: Aarne Ranta <aarne (at) cs.chalmers.se>
|
||||
Last update: %%date(%c)
|
||||
|
||||
% NOTE: this is a txt2tags file.
|
||||
% Create an html file from this file using:
|
||||
% txt2tags --toc -thtml Resource-HOWTO.txt
|
||||
|
||||
%!target:html
|
||||
|
||||
**History**
|
||||
|
||||
September 2008: updated for Version 1.5.
|
||||
|
||||
October 2007: updated for Version 1.2.
|
||||
|
||||
January 2006: first version.
|
||||
|
||||
|
||||
The purpose of this document is to tell how to implement the GF
|
||||
resource grammar API for a new language. We will //not// cover how
|
||||
to use the resource grammar, nor how to change the API. But we
|
||||
will give some hints how to extend the API.
|
||||
|
||||
A manual for using the resource grammar is found in
|
||||
|
||||
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html`` ../lib/resource/doc/synopsis.html].
|
||||
|
||||
A tutorial on GF, also introducing the idea of resource grammars, is found in
|
||||
|
||||
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html`` ./gf-tutorial.html].
|
||||
|
||||
This document concerns the API v. 1.5, while the current stable release is 1.4.
|
||||
You can find the code for the stable release in
|
||||
|
||||
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/`` ../lib/resource]
|
||||
|
||||
and the next release in
|
||||
|
||||
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/`` ../next-lib/src]
|
||||
|
||||
It is recommended to build new grammars to match the next release.
|
||||
|
||||
|
||||
|
||||
|
||||
==The resource grammar structure==
|
||||
|
||||
The library is divided into a bunch of modules, whose dependencies
|
||||
are given in the following figure.
|
||||
|
||||
[Syntax.png]
|
||||
|
||||
Modules of different kinds are distinguished as follows:
|
||||
- solid contours: module seen by end users
|
||||
- dashed contours: internal module
|
||||
- ellipse: abstract/concrete pair of modules
|
||||
- rectangle: resource or instance
|
||||
- diamond: interface
|
||||
|
||||
|
||||
Put in another way:
|
||||
- solid rectangles and diamonds: user-accessible library API
|
||||
- solid ellipses: user-accessible top-level grammar for parsing and linearization
|
||||
- dashed contours: not visible to users
|
||||
|
||||
|
||||
The dashed ellipses form the main parts of the implementation, on which the resource
|
||||
grammar programmer has to work with. She also has to work on the ``Paradigms``
|
||||
module. The rest of the modules can be produced mechanically from corresponding
|
||||
modules for other languages, by just changing the language codes appearing in
|
||||
their module headers.
|
||||
|
||||
The module structure is rather flat: most modules are direct
|
||||
parents of ``Grammar``. The idea
|
||||
is that the implementors can concentrate on one linguistic aspect at a time, or
|
||||
also distribute the work among several authors. The module ``Cat``
|
||||
defines the "glue" that ties the aspects together - a type system
|
||||
to which all the other modules conform, so that e.g. ``NP`` means
|
||||
the same thing in those modules that use ``NP``s and those that
|
||||
constructs them.
|
||||
|
||||
|
||||
===Library API modules===
|
||||
|
||||
For the user of the library, these modules are the most important ones.
|
||||
In a typical application, it is enough to open ``Paradigms`` and ``Syntax``.
|
||||
The module ``Try`` combines these two, making it possible to experiment
|
||||
with combinations of syntactic and lexical constructors by using the
|
||||
``cc`` command in the GF shell. Here are short explanations of each API module:
|
||||
- ``Try``: the whole resource library for a language (``Paradigms``, ``Syntax``,
|
||||
``Irreg``, and ``Extra``);
|
||||
produced mechanically as a collection of modules
|
||||
- ``Syntax``: language-independent categories, syntax functions, and structural words;
|
||||
produced mechanically as a collection of modules
|
||||
- ``Constructors``: language-independent syntax functions and structural words;
|
||||
produced mechanically via functor instantiation
|
||||
- ``Paradigms``: language-dependent morphological paradigms
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
===Phrase category modules===
|
||||
|
||||
The immediate parents of ``Grammar`` will be called **phrase category modules**,
|
||||
since each of them concentrates on a particular phrase category (nouns, verbs,
|
||||
adjectives, sentences,...). A phrase category module tells
|
||||
//how to construct phrases in that category//. You will find out that
|
||||
all functions in any of these modules have the same value type (or maybe
|
||||
one of a small number of different types). Thus we have
|
||||
- ``Noun``: construction of nouns and noun phrases
|
||||
- ``Adjective``: construction of adjectival phrases
|
||||
- ``Verb``: construction of verb phrases
|
||||
- ``Adverb``: construction of adverbial phrases
|
||||
- ``Numeral``: construction of cardinal and ordinal numerals
|
||||
- ``Sentence``: construction of sentences and imperatives
|
||||
- ``Question``: construction of questions
|
||||
- ``Relative``: construction of relative clauses
|
||||
- ``Conjunction``: coordination of phrases
|
||||
- ``Phrase``: construction of the major units of text and speech
|
||||
- ``Text``: construction of texts as sequences of phrases
|
||||
- ``Idiom``: idiomatic expressions such as existentials
|
||||
|
||||
|
||||
|
||||
|
||||
===Infrastructure modules===
|
||||
|
||||
Expressions of each phrase category are constructed in the corresponding
|
||||
phrase category module. But their //use// takes mostly place in other modules.
|
||||
For instance, noun phrases, which are constructed in ``Noun``, are
|
||||
used as arguments of functions of almost all other phrase category modules.
|
||||
How can we build all these modules independently of each other?
|
||||
|
||||
As usual in typeful programming, the //only// thing you need to know
|
||||
about an object you use is its type. When writing a linearization rule
|
||||
for a GF abstract syntax function, the only thing you need to know is
|
||||
the linearization types of its value and argument categories. To achieve
|
||||
the division of the resource grammar to several parallel phrase category modules,
|
||||
what we need is an underlying definition of the linearization types. This
|
||||
definition is given as the implementation of
|
||||
- ``Cat``: syntactic categories of the resource grammar
|
||||
|
||||
|
||||
Any resource grammar implementation has first to agree on how to implement
|
||||
``Cat``. Luckily enough, even this can be done incrementally: you
|
||||
can skip the ``lincat`` definition of a category and use the default
|
||||
``{s : Str}`` until you need to change it to something else. In
|
||||
English, for instance, many categories do have this linearization type.
|
||||
|
||||
|
||||
|
||||
===Lexical modules===
|
||||
|
||||
What is lexical and what is syntactic is not as clearcut in GF as in
|
||||
some other grammar formalisms. Logically, lexical means atom, i.e. a
|
||||
``fun`` with no arguments. Linguistically, one may add to this
|
||||
that the ``lin`` consists of only one token (or of a table whose values
|
||||
are single tokens). Even in the restricted lexicon included in the resource
|
||||
API, the latter rule is sometimes violated in some languages. For instance,
|
||||
``Structural.both7and_DConj`` is an atom, but its linearization is
|
||||
two words e.g. //both - and//.
|
||||
|
||||
Another characterization of lexical is that lexical units can be added
|
||||
almost //ad libitum//, and they cannot be defined in terms of already
|
||||
given rules. The lexical modules of the resource API are thus more like
|
||||
samples than complete lists. There are two such modules:
|
||||
- ``Structural``: structural words (determiners, conjunctions,...)
|
||||
- ``Lexicon``: basic everyday content words (nouns, verbs,...)
|
||||
|
||||
|
||||
The module ``Structural`` aims for completeness, and is likely to
|
||||
be extended in future releases of the resource. The module ``Lexicon``
|
||||
gives a "random" list of words, which enables testing the syntax.
|
||||
It also provides a check list for morphology, since those words are likely to include
|
||||
most morphological patterns of the language.
|
||||
|
||||
In the case of ``Lexicon`` it may come out clearer than anywhere else
|
||||
in the API that it is impossible to give exact translation equivalents in
|
||||
different languages on the level of a resource grammar. This is no problem,
|
||||
since application grammars can use the resource in different ways for
|
||||
different languages.
|
||||
|
||||
|
||||
==Language-dependent syntax modules==
|
||||
|
||||
In addition to the common API, there is room for language-dependent extensions
|
||||
of the resource. The top level of each languages looks as follows (with German
|
||||
as example):
|
||||
```
|
||||
abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs
|
||||
```
|
||||
where ``ExtraGerAbs`` is a collection of syntactic structures specific to German,
|
||||
and ``IrregGerAbs`` is a dictionary of irregular words of German
|
||||
(at the moment, just verbs). Each of these language-specific grammars has
|
||||
the potential to grow into a full-scale grammar of the language. These grammar
|
||||
can also be used as libraries, but the possibility of using functors is lost.
|
||||
|
||||
To give a better overview of language-specific structures,
|
||||
modules like ``ExtraGerAbs``
|
||||
are built from a language-independent module ``ExtraAbs``
|
||||
by restricted inheritance:
|
||||
```
|
||||
abstract ExtraGerAbs = Extra [f,g,...]
|
||||
```
|
||||
Thus any category and function in ``Extra`` may be shared by a subset of all
|
||||
languages. One can see this set-up as a matrix, which tells
|
||||
what ``Extra`` structures
|
||||
are implemented in what languages. For the common API in ``Grammar``, the matrix
|
||||
is filled with 1's (everything is implemented in every language).
|
||||
|
||||
In a minimal resource grammar implementation, the language-dependent
|
||||
extensions are just empty modules, but it is good to provide them for
|
||||
the sake of uniformity.
|
||||
|
||||
|
||||
|
||||
===The present-tense fragment===
|
||||
|
||||
Some lines in the resource library are suffixed with the comment
|
||||
```
|
||||
--# notpresent
|
||||
```
|
||||
which is used by a preprocessor to exclude those lines from
|
||||
a reduced version of the full resource. This present-tense-only
|
||||
version is useful for applications in most technical text, since
|
||||
they reduce the grammar size and compilation time. It can also
|
||||
be useful to exclude those lines in a first version of resource
|
||||
implementation. To compile a grammar with present-tense-only, use
|
||||
```
|
||||
make Present
|
||||
```
|
||||
with ``resource/Makefile``.
|
||||
|
||||
|
||||
|
||||
==Phases of the work==
|
||||
|
||||
===Putting up a directory===
|
||||
|
||||
Unless you are writing an instance of a parametrized implementation
|
||||
(Romance or Scandinavian), which will be covered later, the
|
||||
simplest way is to follow roughly the following procedure. Assume you
|
||||
are building a grammar for the German language. Here are the first steps,
|
||||
which we actually followed ourselves when building the German implementation
|
||||
of resource v. 1.0 at Ubuntu linux. We have slightly modified them to
|
||||
match resource v. 1.5 and GF v. 3.0.
|
||||
|
||||
+ Create a sister directory for ``GF/lib/resource/english``, named
|
||||
``german``.
|
||||
```
|
||||
cd GF/lib/resource/
|
||||
mkdir german
|
||||
cd german
|
||||
```
|
||||
|
||||
+ Check out the [ISO 639 3-letter language code
|
||||
http://www.w3.org/WAI/ER/IG/ert/iso639.htm]
|
||||
for German: both ``Ger`` and ``Deu`` are given, and we pick ``Ger``.
|
||||
(We use the 3-letter codes rather than the more common 2-letter codes,
|
||||
since they will suffice for many more languages!)
|
||||
|
||||
+ Copy the ``*Eng.gf`` files from ``english`` ``german``,
|
||||
and rename them:
|
||||
```
|
||||
cp ../english/*Eng.gf .
|
||||
rename 's/Eng/Ger/' *Eng.gf
|
||||
```
|
||||
If you don't have the ``rename`` command, you can use a bash script with ``mv``.
|
||||
|
||||
|
||||
+ Change the ``Eng`` module references to ``Ger`` references
|
||||
in all files:
|
||||
```
|
||||
sed -i 's/English/German/g' *Ger.gf
|
||||
sed -i 's/Eng/Ger/g' *Ger.gf
|
||||
```
|
||||
The first line prevents changing the word ``English``, which appears
|
||||
here and there in comments, to ``Gerlish``. The ``sed`` command syntax
|
||||
may vary depending on your operating system.
|
||||
|
||||
+ This may of course change unwanted occurrences of the
|
||||
string ``Eng`` - verify this by
|
||||
```
|
||||
grep Ger *.gf
|
||||
```
|
||||
But you will have to make lots of manual changes in all files anyway!
|
||||
|
||||
+ Comment out the contents of these files:
|
||||
```
|
||||
sed -i 's/^/--/' *Ger.gf
|
||||
```
|
||||
This will give you a set of templates out of which the grammar
|
||||
will grow as you uncomment and modify the files rule by rule.
|
||||
|
||||
+ In all ``.gf`` files, uncomment the module headers and brackets,
|
||||
leaving the module bodies commented. Unfortunately, there is no
|
||||
simple way to do this automatically (or to avoid commenting these
|
||||
lines in the previous step) - but uncommenting the first
|
||||
and the last lines will actually do the job for many of the files.
|
||||
|
||||
+ Uncomment the contents of the main grammar file:
|
||||
```
|
||||
sed -i 's/^--//' LangGer.gf
|
||||
```
|
||||
|
||||
+ Now you can open the grammar ``LangGer`` in GF:
|
||||
```
|
||||
gf LangGer.gf
|
||||
```
|
||||
You will get lots of warnings on missing rules, but the grammar will compile.
|
||||
|
||||
+ At all the following steps you will now have a valid, but incomplete
|
||||
GF grammar. The GF command
|
||||
```
|
||||
pg -missing
|
||||
```
|
||||
tells you what exactly is missing.
|
||||
|
||||
|
||||
Here is the module structure of ``LangGer``. It has been simplified by leaving out
|
||||
the majority of the phrase category modules. Each of them has the same dependencies
|
||||
as ``VerbGer``, whose complete dependencies are shown as an example.
|
||||
|
||||
[German.png]
|
||||
|
||||
|
||||
===Direction of work===
|
||||
|
||||
The real work starts now. There are many ways to proceed, the most obvious ones being
|
||||
- Top-down: start from the module ``Phrase`` and go down to ``Sentence``, then
|
||||
``Verb``, ``Noun``, and in the end ``Lexicon``. In this way, you are all the time
|
||||
building complete phrases, and add them with more content as you proceed.
|
||||
**This approach is not recommended**. It is impossible to test the rules if
|
||||
you have no words to apply the constructions to.
|
||||
|
||||
- Bottom-up: set as your first goal to implement ``Lexicon``. To this end, you
|
||||
need to write ``ParadigmsGer``, which in turn needs parts of
|
||||
``MorphoGer`` and ``ResGer``.
|
||||
**This approach is not recommended**. You can get stuck to details of
|
||||
morphology such as irregular words, and you don't have enough grasp about
|
||||
the type system to decide what forms to cover in morphology.
|
||||
|
||||
|
||||
The practical working direction is thus a saw-like motion between the morphological
|
||||
and top-level modules. Here is a possible course of the work that gives enough
|
||||
test data and enough general view at any point:
|
||||
+ Define ``Cat.N`` and the required parameter types in ``ResGer``. As we define
|
||||
```
|
||||
lincat N = {s : Number => Case => Str ; g : Gender} ;
|
||||
```
|
||||
we need the parameter types ``Number``, ``Case``, and ``Gender``. The definition
|
||||
of ``Number`` in [``common/ParamX`` ../lib/resource/common/ParamX.gf]
|
||||
works for German, so we
|
||||
use it and just define ``Case`` and ``Gender`` in ``ResGer``.
|
||||
|
||||
+ Define some cases of ``mkN`` in ``ParadigmsGer``. In this way you can
|
||||
already implement a huge amount of nouns correctly in ``LexiconGer``. Actually
|
||||
just adding the worst-case instance of ``mkN`` (the one taking the most
|
||||
arguments) should suffice for every noun - but,
|
||||
since it is tedious to use, you
|
||||
might proceed to the next step before returning to morphology and defining the
|
||||
real work horse, ``mkN`` taking two forms and a gender.
|
||||
|
||||
+ While doing this, you may want to test the resource independently. Do this by
|
||||
starting the GF shell in the ``resource`` directory, by the commands
|
||||
```
|
||||
> i -retain german/ParadigmsGer
|
||||
> cc -table mkN "Kirche"
|
||||
```
|
||||
|
||||
+ Proceed to determiners and pronouns in
|
||||
``NounGer`` (``DetCN UsePron DetQuant NumSg DefArt IndefArt UseN``) and
|
||||
``StructuralGer`` (``i_Pron this_Quant``). You also need some categories and
|
||||
parameter types. At this point, it is maybe not possible to find out the final
|
||||
linearization types of ``CN``, ``NP``, ``Det``, and ``Quant``, but at least you should
|
||||
be able to correctly inflect noun phrases such as //every airplane//:
|
||||
```
|
||||
> i german/LangGer.gf
|
||||
> l -table DetCN every_Det (UseN airplane_N)
|
||||
|
||||
Nom: jeder Flugzeug
|
||||
Acc: jeden Flugzeug
|
||||
Dat: jedem Flugzeug
|
||||
Gen: jedes Flugzeugs
|
||||
```
|
||||
|
||||
+ Proceed to verbs: define ``CatGer.V``, ``ResGer.VForm``, and
|
||||
``ParadigmsGer.mkV``. You may choose to exclude ``notpresent``
|
||||
cases at this point. But anyway, you will be able to inflect a good
|
||||
number of verbs in ``Lexicon``, such as
|
||||
``live_V`` (``mkV "leben"``).
|
||||
|
||||
+ Now you can soon form your first sentences: define ``VP`` and
|
||||
``Cl`` in ``CatGer``, ``VerbGer.UseV``, and ``SentenceGer.PredVP``.
|
||||
Even if you have excluded the tenses, you will be able to produce
|
||||
```
|
||||
> i -preproc=./mkPresent german/LangGer.gf
|
||||
> l -table PredVP (UsePron i_Pron) (UseV live_V)
|
||||
|
||||
Pres Simul Pos Main: ich lebe
|
||||
Pres Simul Pos Inv: lebe ich
|
||||
Pres Simul Pos Sub: ich lebe
|
||||
Pres Simul Neg Main: ich lebe nicht
|
||||
Pres Simul Neg Inv: lebe ich nicht
|
||||
Pres Simul Neg Sub: ich nicht lebe
|
||||
```
|
||||
You should also be able to parse:
|
||||
```
|
||||
> p -cat=Cl "ich lebe"
|
||||
PredVP (UsePron i_Pron) (UseV live_V)
|
||||
```
|
||||
|
||||
+ Transitive verbs
|
||||
(``CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a``)
|
||||
are a natural next step, so that you can
|
||||
produce ``ich liebe dich`` ("I love you").
|
||||
|
||||
+ Adjectives (``CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA``)
|
||||
will force you to think about strong and weak declensions, so that you can
|
||||
correctly inflect //mein neuer Wagen, dieser neue Wagen//
|
||||
("my new car, this new car").
|
||||
|
||||
+ Once you have implemented the set
|
||||
(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP),
|
||||
you have overcome most of difficulties. You know roughly what parameters
|
||||
and dependences there are in your language, and you can now proceed very
|
||||
much in the order you please.
|
||||
|
||||
|
||||
|
||||
===The develop-test cycle===
|
||||
|
||||
The following develop-test cycle will
|
||||
be applied most of the time, both in the first steps described above
|
||||
and in later steps where you are more on your own.
|
||||
|
||||
+ Select a phrase category module, e.g. ``NounGer``, and uncomment some
|
||||
linearization rules (for instance, ``DetCN``, as above).
|
||||
|
||||
+ Write down some German examples of this rule, for instance translations
|
||||
of "the dog", "the house", "the big house", etc. Write these in all their
|
||||
different forms (two numbers and four cases).
|
||||
|
||||
+ Think about the categories involved (``CN, NP, N, Det``) and the
|
||||
variations they have. Encode this in the lincats of ``CatGer``.
|
||||
You may have to define some new parameter types in ``ResGer``.
|
||||
|
||||
+ To be able to test the construction,
|
||||
define some words you need to instantiate it
|
||||
in ``LexiconGer``. You will also need some regular inflection patterns
|
||||
in``ParadigmsGer``.
|
||||
|
||||
+ Test by parsing, linearization,
|
||||
and random generation. In particular, linearization to a table should
|
||||
be used so that you see all forms produced; the ``treebank`` option
|
||||
preserves the tree
|
||||
```
|
||||
> gr -cat=NP -number=20 | l -table -treebank
|
||||
```
|
||||
|
||||
+ Save some tree-linearization pairs for later regression testing. You can save
|
||||
a gold standard treebank and use the Unix ``diff`` command to compare later
|
||||
linearizations produced from the same list of trees. If you save the trees
|
||||
in a file ``trees``, you can do as follows:
|
||||
```
|
||||
> rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank
|
||||
```
|
||||
|
||||
+ A file with trees testing all resource functions is included in the resource,
|
||||
entitled ``resource/exx-resource.gft``. A treebank can be created from this by
|
||||
the Unix command
|
||||
```
|
||||
% runghc Make.hs test langs=Ger
|
||||
```
|
||||
|
||||
|
||||
|
||||
You are likely to run this cycle a few times for each linearization rule
|
||||
you implement, and some hundreds of times altogether. There are roughly
|
||||
70 ``cat``s and
|
||||
600 ``funs`` in ``Lang`` at the moment; 170 of the ``funs`` are outside the two
|
||||
lexicon modules).
|
||||
|
||||
|
||||
===Auxiliary modules===
|
||||
|
||||
These auxuliary ``resource`` modules will be written by you.
|
||||
|
||||
- ``ResGer``: parameter types and auxiliary operations
|
||||
(a resource for the resource grammar!)
|
||||
- ``ParadigmsGer``: complete inflection engine and most important regular paradigms
|
||||
- ``MorphoGer``: auxiliaries for ``ParadigmsGer`` and ``StructuralGer``. This need
|
||||
not be separate from ``ResGer``.
|
||||
|
||||
|
||||
These modules are language-independent and provided by the existing resource
|
||||
package.
|
||||
|
||||
- ``ParamX``: parameter types used in many languages
|
||||
- ``CommonX``: implementation of language-uniform categories
|
||||
such as $Text$ and $Phr$, as well as of
|
||||
the logical tense, anteriority, and polarity parameters
|
||||
- ``Coordination``: operations to deal with lists and coordination
|
||||
- ``Prelude``: general-purpose operations on strings, records,
|
||||
truth values, etc.
|
||||
- ``Predef``: general-purpose operations with hard-coded definitions
|
||||
|
||||
|
||||
An important decision is what rules to implement in terms of operations in
|
||||
``ResGer``. The **golden rule of functional programming** says:
|
||||
- //Whenever you find yourself programming by copy and paste, write a function instead!//.
|
||||
|
||||
|
||||
This rule suggests that an operation should be created if it is to be
|
||||
used at least twice. At the same time, a sound principle of **vicinity** says:
|
||||
- //It should not require too much browsing to understand what a piece of code does.//
|
||||
|
||||
|
||||
From these two principles, we have derived the following practice:
|
||||
- If an operation is needed //in two different modules//,
|
||||
it should be created in as an ``oper`` in ``ResGer``. An example is ``mkClause``,
|
||||
used in ``Sentence``, ``Question``, and ``Relative``-
|
||||
- If an operation is needed //twice in the same module//, but never
|
||||
outside, it should be created in the same module. Many examples are
|
||||
found in ``Numerals``.
|
||||
- If an operation is needed //twice in the same judgement//, but never
|
||||
outside, it should be created by a ``let`` definition.
|
||||
- If an operation is only needed once, it should not be created as an ``oper``,
|
||||
but rather inlined. However, a ``let`` definition may well be in place just
|
||||
to make the readable.
|
||||
Most functions in phrase category modules
|
||||
are implemented in this way.
|
||||
|
||||
|
||||
This discipline is very different from the one followed in early
|
||||
versions of the library (up to 0.9). We then valued the principle of
|
||||
abstraction more than vicinity, creating layers of abstraction for
|
||||
almost everything. This led in practice to the duplication of almost
|
||||
all code on the ``lin`` and ``oper`` levels, and made the code
|
||||
hard to understand and maintain.
|
||||
|
||||
|
||||
|
||||
===Morphology and lexicon===
|
||||
|
||||
The paradigms needed to implement
|
||||
``LexiconGer`` are defined in
|
||||
``ParadigmsGer``.
|
||||
This module provides high-level ways to define the linearization of
|
||||
lexical items, of categories ``N, A, V`` and their complement-taking
|
||||
variants.
|
||||
|
||||
For ease of use, the ``Paradigms`` modules follow a certain
|
||||
naming convention. Thus they for each lexical category, such as ``N``,
|
||||
the overloaded functions, such as ``mkN``, with the following cases:
|
||||
|
||||
- the worst-case construction of ``N``. Its type signature
|
||||
has the form
|
||||
```
|
||||
mkN : Str -> ... -> Str -> P -> ... -> Q -> N
|
||||
```
|
||||
with as many string and parameter arguments as can ever be needed to
|
||||
construct an ``N``.
|
||||
- the most regular cases, with just one string argument:
|
||||
```
|
||||
mkN : Str -> N
|
||||
```
|
||||
- A language-dependent (small) set of functions to handle mild irregularities
|
||||
and common exceptions.
|
||||
|
||||
|
||||
For the complement-taking variants, such as ``V2``, we provide
|
||||
- a case that takes a ``V`` and all necessary arguments, such
|
||||
as case and preposition:
|
||||
```
|
||||
mkV2 : V -> Case -> Str -> V2 ;
|
||||
```
|
||||
- a case that takes a ``Str`` and produces a transitive verb with the direct
|
||||
object case:
|
||||
```
|
||||
mkV2 : Str -> V2 ;
|
||||
```
|
||||
- A language-dependent (small) set of functions to handle common special cases,
|
||||
such as transitive verbs that are not regular:
|
||||
```
|
||||
mkV2 : V -> V2 ;
|
||||
```
|
||||
|
||||
|
||||
The golden rule for the design of paradigms is that
|
||||
- //The user of the library will only need function applications with constants and strings, never any records or tables.//
|
||||
|
||||
|
||||
The discipline of data abstraction moreover requires that the user of the resource
|
||||
is not given access to parameter constructors, but only to constants that denote
|
||||
them. This gives the resource grammarian the freedom to change the underlying
|
||||
data representation if needed. It means that the ``ParadigmsGer`` module has
|
||||
to define constants for those parameter types and constructors that
|
||||
the application grammarian may need to use, e.g.
|
||||
```
|
||||
oper
|
||||
Case : Type ;
|
||||
nominative, accusative, genitive, dative : Case ;
|
||||
```
|
||||
These constants are defined in terms of parameter types and constructors
|
||||
in ``ResGer`` and ``MorphoGer``, which modules are not
|
||||
visible to the application grammarian.
|
||||
|
||||
|
||||
===Lock fields===
|
||||
|
||||
An important difference between ``MorphoGer`` and
|
||||
``ParadigmsGer`` is that the former uses "raw" record types
|
||||
for word classes, whereas the latter used category symbols defined in
|
||||
``CatGer``. When these category symbols are used to denote
|
||||
record types in a resource modules, such as ``ParadigmsGer``,
|
||||
a **lock field** is added to the record, so that categories
|
||||
with the same implementation are not confused with each other.
|
||||
(This is inspired by the ``newtype`` discipline in Haskell.)
|
||||
For instance, the lincats of adverbs and conjunctions are the same
|
||||
in ``CommonX`` (and therefore in ``CatGer``, which inherits it):
|
||||
```
|
||||
lincat Adv = {s : Str} ;
|
||||
lincat Conj = {s : Str} ;
|
||||
```
|
||||
But when these category symbols are used to denote their linearization
|
||||
types in resource module, these definitions are translated to
|
||||
```
|
||||
oper Adv : Type = {s : Str ; lock_Adv : {}} ;
|
||||
oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
|
||||
```
|
||||
In this way, the user of a resource grammar cannot confuse adverbs with
|
||||
conjunctions. In other words, the lock fields force the type checker
|
||||
to function as grammaticality checker.
|
||||
|
||||
When the resource grammar is ``open``ed in an application grammar, the
|
||||
lock fields are never seen (except possibly in type error messages),
|
||||
and the application grammarian should never write them herself. If she
|
||||
has to do this, it is a sign that the resource grammar is incomplete, and
|
||||
the proper way to proceed is to fix the resource grammar.
|
||||
|
||||
The resource grammarian has to provide the dummy lock field values
|
||||
in her hidden definitions of constants in ``Paradigms``. For instance,
|
||||
```
|
||||
mkAdv : Str -> Adv ;
|
||||
-- mkAdv s = {s = s ; lock_Adv = <>} ;
|
||||
```
|
||||
|
||||
|
||||
===Lexicon construction===
|
||||
|
||||
The lexicon belonging to ``LangGer`` consists of two modules:
|
||||
- ``StructuralGer``, structural words, built by using both
|
||||
``ParadigmsGer`` and ``MorphoGer``.
|
||||
- ``LexiconGer``, content words, built by using ``ParadigmsGer`` only.
|
||||
|
||||
|
||||
The reason why ``MorphoGer`` has to be used in ``StructuralGer``
|
||||
is that ``ParadigmsGer`` does not contain constructors for closed
|
||||
word classes such as pronouns and determiners. The reason why we
|
||||
recommend ``ParadigmsGer`` for building ``LexiconGer`` is that
|
||||
the coverage of the paradigms gets thereby tested and that the
|
||||
use of the paradigms in ``LexiconGer`` gives a good set of examples for
|
||||
those who want to build new lexica.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
==Lexicon extension==
|
||||
|
||||
===The irregularity lexicon===
|
||||
|
||||
It is useful in most languages to provide a separate module of irregular
|
||||
verbs and other words which are difficult for a lexicographer
|
||||
to handle. There are usually a limited number of such words - a
|
||||
few hundred perhaps. Building such a lexicon separately also
|
||||
makes it less important to cover //everything// by the
|
||||
worst-case variants of the paradigms ``mkV`` etc.
|
||||
|
||||
|
||||
|
||||
===Lexicon extraction from a word list===
|
||||
|
||||
You can often find resources such as lists of
|
||||
irregular verbs on the internet. For instance, the
|
||||
Irregular German Verb page
|
||||
previously found in
|
||||
``http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html``
|
||||
page gives a list of verbs in the
|
||||
traditional tabular format, which begins as follows:
|
||||
```
|
||||
backen (du bäckst, er bäckt) backte [buk] gebacken
|
||||
befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
|
||||
beginnen begann (begönne; begänne) begonnen
|
||||
beißen biß gebissen
|
||||
```
|
||||
All you have to do is to write a suitable verb paradigm
|
||||
```
|
||||
irregV : (x1,_,_,_,_,x6 : Str) -> V ;
|
||||
```
|
||||
and a Perl or Python or Haskell script that transforms
|
||||
the table to
|
||||
```
|
||||
backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
|
||||
befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
|
||||
```
|
||||
|
||||
When using ready-made word lists, you should think about
|
||||
coyright issues. All resource grammar material should
|
||||
be provided under GNU Lesser General Public License (LGPL).
|
||||
|
||||
|
||||
|
||||
===Lexicon extraction from raw text data===
|
||||
|
||||
This is a cheap technique to build a lexicon of thousands
|
||||
of words, if text data is available in digital format.
|
||||
See the [Extract Homepage http://www.cs.chalmers.se/~markus/extract/]
|
||||
homepage for details.
|
||||
|
||||
|
||||
===Bootstrapping with smart paradigms===
|
||||
|
||||
This is another cheap technique, where you need as input a list of words with
|
||||
part-of-speech marking. You initialize the lexicon by using the one-argument
|
||||
``mkN`` etc paradigms, and add forms to those words that do not come out right.
|
||||
This procedure is described in the paper
|
||||
|
||||
A. Ranta.
|
||||
How predictable is Finnish morphology? An experiment on lexicon construction.
|
||||
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
|
||||
//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//,
|
||||
University of Uppsala,
|
||||
2008.
|
||||
Available from the [series homepage http://publications.uu.se/abstract.xsql?dbid=8933]
|
||||
|
||||
|
||||
|
||||
|
||||
==Extending the resource grammar API==
|
||||
|
||||
Sooner or later it will happen that the resource grammar API
|
||||
does not suffice for all applications. A common reason is
|
||||
that it does not include idiomatic expressions in a given language.
|
||||
The solution then is in the first place to build language-specific
|
||||
extension modules, like ``ExtraGer``.
|
||||
|
||||
==Using parametrized modules==
|
||||
|
||||
===Writing an instance of parametrized resource grammar implementation===
|
||||
|
||||
Above we have looked at how a resource implementation is built by
|
||||
the copy and paste method (from English to German), that is, formally
|
||||
speaking, from scratch. A more elegant solution available for
|
||||
families of languages such as Romance and Scandinavian is to
|
||||
use parametrized modules. The advantages are
|
||||
- theoretical: linguistic generalizations and insights
|
||||
- practical: maintainability improves with fewer components
|
||||
|
||||
|
||||
Here is a set of
|
||||
[slides http://www.cs.chalmers.se/~aarne/geocal2006.pdf]
|
||||
on the topic.
|
||||
|
||||
|
||||
===Parametrizing a resource grammar implementation===
|
||||
|
||||
This is the most demanding form of resource grammar writing.
|
||||
We do //not// recommend the method of parametrizing from the
|
||||
beginning: it is easier to have one language first implemented
|
||||
in the conventional way and then add another language of the
|
||||
same family by aprametrization. This means that the copy and
|
||||
paste method is still used, but at this time the differences
|
||||
are put into an ``interface`` module.
|
||||
|
||||
|
||||
==Character encoding and transliterations==
|
||||
|
||||
This section is relevant for languages using a non-ASCII character set.
|
||||
|
||||
==Coding conventions in GF==
|
||||
|
||||
From version 3.0, GF follows a simple encoding convention:
|
||||
- GF source files may follow any encoding, such as isolatin-1 or UTF-8;
|
||||
the default is isolatin-1, and UTF8 must be indicated by the judgement
|
||||
```
|
||||
flags coding = utf8 ;
|
||||
```
|
||||
in each source module.
|
||||
- for internal processing, all characters are converted to 16-bit unicode,
|
||||
as the first step of grammar compilation guided by the ``coding`` flag
|
||||
- as the last step of compilation, all characters are converted to UTF-8
|
||||
- thus, GF object files (``gfo``) and the Portable Grammar Format (``pgf``)
|
||||
are in UTF-8
|
||||
|
||||
|
||||
Most current resource grammars use isolatin-1 in the source, but this does
|
||||
not affect their use in parallel with grammars written in other encodings.
|
||||
In fact, a grammar can be put up from modules using different codings.
|
||||
|
||||
**Warning**. While string literals may contain any characters, identifiers
|
||||
must be isolatin-1 letters (or digits, underscores, or dashes). This has to
|
||||
do with the restrictions of the lexer tool that is used.
|
||||
|
||||
|
||||
==Transliterations==
|
||||
|
||||
While UTF-8 is well supported by most web browsers, its use in terminals and
|
||||
text editors may cause disappointment. Many grammarians therefore prefer to
|
||||
use ASCII transliterations. GF 3.0beta2 provides the following built-in
|
||||
transliterations:
|
||||
- Arabic
|
||||
- Devanagari (Hindi)
|
||||
- Thai
|
||||
|
||||
|
||||
New transliterations can be defined in the GF source file
|
||||
[``GF/Text/Transliterations.hs`` ../src/GF/Text/Transliterations.hs].
|
||||
This file also gives instructions on how new ones are added.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
BIN
doc/Syntax.png
|
Before Width: | Height: | Size: 102 KiB |
231
doc/TODO
@@ -1,231 +0,0 @@
|
||||
|
||||
* Some notes on the syntax of this file, making it possible to use todoo-mode.el:
|
||||
|
||||
- Items start with "* "
|
||||
- Sub-items start with "- "
|
||||
- It should be noted somewhere in the item, who has reported the item
|
||||
Suggestion: Add "[who]" at the beginning of the item title
|
||||
(then one can use "assign item" in todoo-mode)
|
||||
- Each item should have a priority
|
||||
Suggestion: Add "URGENT", "IMPORTANT" or "WISH" at the beginning of
|
||||
the item title
|
||||
- Sort the items in priority order
|
||||
(todoo-mode can move an item up or down)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
|
||||
* [peb] URGENT: Error messages for syntax errors
|
||||
|
||||
When a syntax error is reported, it should be noted which file it
|
||||
is. Otherwise it is impossible to know where the error is
|
||||
(if one uses the -s flag):
|
||||
|
||||
> i -s Domain/MP3/Domain_MP_Semantics.gf
|
||||
syntax error at line 33 before ve , Proposition ,
|
||||
|
||||
There's no problem with other kinds of errors:
|
||||
|
||||
> i -s Domain/MP3/Domain_MP_Semantics.gf
|
||||
checking module Godis_Semantics
|
||||
Happened in linearization of userMove :
|
||||
product expected instead of {
|
||||
pl : Str
|
||||
}
|
||||
|
||||
|
||||
* [peb] IMPORTANT: Add the -path of a module to daughter modules
|
||||
|
||||
Then the main module does not have to know where all grandchildren are:
|
||||
|
||||
file A.gf:
|
||||
abstract A = B ** {...}
|
||||
|
||||
file B.gf:
|
||||
--# -path=./resource
|
||||
abstract B = Lang ** {...}
|
||||
|
||||
I.e.: the file A.gf should not need to know that B.gf uses the
|
||||
resource library.
|
||||
|
||||
|
||||
* [peb] IMPORTANT: incomplete concrete and interfaces
|
||||
|
||||
- The following works in GF:
|
||||
|
||||
incomplete concrete TestDI of TestA = open (C=TestCI) in {
|
||||
lincat A = TestCI.A ** {p : Str};
|
||||
lin f = TestCI.f ** {p = "f"};
|
||||
g = TestCI.g ** {p = "g"};
|
||||
}
|
||||
|
||||
> i -src TestDE.gf
|
||||
|
||||
- BUT, if we exchange "TestCI" for "C" we get an error:
|
||||
|
||||
incomplete concrete TestDI of TestA = open (C=TestCI) in {
|
||||
lincat A = C.A ** {p : Str};
|
||||
lin f = C.f ** {p = "f"};
|
||||
g = C.g ** {p = "g"};
|
||||
}
|
||||
|
||||
> i -src TestDE.gf
|
||||
compiling TestDE.gf... failed to find C
|
||||
OCCURRED IN
|
||||
atomic term C given TestCE TestCI TestCE TestDE
|
||||
OCCURRED IN
|
||||
renaming definition of f
|
||||
OCCURRED IN
|
||||
renaming module TestDE
|
||||
|
||||
- the other modules:
|
||||
|
||||
abstract TestA = {
|
||||
cat A;
|
||||
fun f, g : A;
|
||||
}
|
||||
|
||||
instance TestBE of TestBI = {
|
||||
oper hello = "hello";
|
||||
bye = "bye";
|
||||
}
|
||||
|
||||
interface TestBI = {
|
||||
oper hello : Str;
|
||||
bye : Str;
|
||||
}
|
||||
|
||||
concrete TestCE of TestA = TestCI with (TestBI = TestBE);
|
||||
|
||||
incomplete concrete TestCI of TestA = open TestBI in {
|
||||
lincat A = {s : Str};
|
||||
lin f = {s = hello};
|
||||
g = {s = bye};
|
||||
}
|
||||
|
||||
concrete TestDE of TestA = TestDI with (TestCI = TestCE);
|
||||
|
||||
* [peb] IMPORTANT: Missing things in the help command
|
||||
|
||||
> h -printer
|
||||
(the flag -printer=cfgm is missing)
|
||||
|
||||
> h -cat
|
||||
WARNING: invalid option: cat
|
||||
|
||||
> h -lang
|
||||
WARNING: invalid option: lang
|
||||
|
||||
> h -language
|
||||
WARNING: invalid option: language
|
||||
|
||||
> h -parser
|
||||
WARNING: invalid option: parser
|
||||
|
||||
> h -aslkdjaslkdjss
|
||||
WARNING: invalid option: aslkdjaslkdjss
|
||||
Command not found.
|
||||
(it should note: "option not found")
|
||||
|
||||
> h -optimize
|
||||
WARNING: invalid option: optimize
|
||||
|
||||
> h -startcat
|
||||
WARNING: invalid option: startcat
|
||||
|
||||
> h h
|
||||
h, help: h Command?
|
||||
(it should also mention "h -option")
|
||||
|
||||
|
||||
* [peb] IMPORTANT: Set GF_LIb-PATH within GF
|
||||
|
||||
> sf libpath=~/GF/lib
|
||||
|
||||
|
||||
* [peb] IMPORTANT: Set the starting category with "sf"
|
||||
|
||||
> sf startcat=X
|
||||
|
||||
|
||||
* [peb] IMPORTANT: import-flags
|
||||
|
||||
- There are some inconsistencies when importing grammars:
|
||||
|
||||
1. when doing "pg -printer=cfg", one must have used "i -conversion=finite",
|
||||
since "pg" doesn't care about the flags that are set in the grammar file
|
||||
|
||||
2. when doing "pm -printer=cfgm", one must have set the flag
|
||||
"conversion=finite" within the grammar file, since "pm" doesn't
|
||||
care about the flags to the import command
|
||||
|
||||
(I guess it's me (peb) who should fix this, but I don't know where
|
||||
the different flags reside...)
|
||||
|
||||
- Also, it must be decided in what cases flags can override other flags:
|
||||
|
||||
a) in the grammar file, e.g. "flags conversion=finite;"
|
||||
b) on the command line, e.g. "> sf conversion=finite"
|
||||
c) as argument to a command, e.g. "> i -conversion=finite file.gf"
|
||||
|
||||
- A related issue is to decide the scope of flags:
|
||||
|
||||
Some flags are (or should be) local to the module
|
||||
(e.g. -coding and -path)
|
||||
Other flags override daughter flags for daughter modules
|
||||
(e.g. -startcat and -conversion)
|
||||
|
||||
* [bringert] IMPORTANT: get right startcat flag when printing CFGM
|
||||
GF.CFGM.PrintCFGrammar.prCanonAsCFGM currently only gets the startcat
|
||||
flag from the top-level concrete module. This might be easier
|
||||
to fix if the multi grammar printers had access to more than just
|
||||
the CanonGrammar.
|
||||
|
||||
* [peb] WISH: generalizing incomplete concrete
|
||||
|
||||
I want to be able to open an incomplete concrete module
|
||||
inside another incomplete conrete.
|
||||
Then I can instantiate both incompletes at the same time.
|
||||
|
||||
* [peb] WISH: _tmpi, _tmpo
|
||||
|
||||
The files _tmpi and _tmpo are never removed when quitting GF.
|
||||
Further suggestion: put them in /tmp or similar.
|
||||
|
||||
peb: när man använder "|" till ett systemanrop, t.ex:
|
||||
pg | ! sort
|
||||
så skapas filerna _tmpi och _tmpo. Men de tas aldrig bort.
|
||||
|
||||
peb: Ännu bättre: ta bort filerna efteråt.
|
||||
|
||||
aarne: Sant: när GF quittas (om detta inte sker onormalt).
|
||||
Eller när kommandot har kört färdigt (om det terminerar).
|
||||
|
||||
peb: Bäst(?): skapa filerna i /tmp eller liknande.
|
||||
|
||||
aarne: Ibland får man skrivrättighetsproblem - och det är
|
||||
inte kul om man måste ange en tmp-path. Och olika
|
||||
användare och gf-processer måste ha unika filnamn.
|
||||
Och vet inte hur det funkar på windows...
|
||||
|
||||
aarne: Ett till alternativ skulle vara att använda handles
|
||||
utan några tmp-filer alls. Men jag har inte hunnit
|
||||
ta reda på hur det går till.
|
||||
|
||||
björn: Lite slumpmässiga tankar:
|
||||
+ man kan använda System.Directory.getTemporaryDirectory, så slipper man iaf bry sig om olika plattformsproblem.
|
||||
+ sen kan man använda System.IO.openTempFile för att skapa en temporär fil. Den tas dock inte bort när programmet avslutas, så det får man fixa själv.
|
||||
+ System.Posix.Temp.mkstemp gör nåt liknande, men dokumentationen är dålig.
|
||||
+ biblioteket HsShellScript har lite funktioner för sånt här, se
|
||||
http://www.volker-wysk.de/hsshellscript/apidoc/HsShellScript.html#16
|
||||
|
||||
|
||||
* [peb] WISH: Hierarchic modules
|
||||
|
||||
Suggestion by peb:
|
||||
The module A.B.C is located in the file A/B/C.gf
|
||||
|
||||
Main advantage: you no longer need to state "--# -path=..." in
|
||||
modules
|
||||
|
||||
- How can this be combined with several modules inside one file?
|
||||
@@ -1,750 +0,0 @@
|
||||
Compiling GF
|
||||
Aarne Ranta
|
||||
Proglog meeting, 1 November 2006
|
||||
|
||||
% to compile: txt2tags -thtml compiling-gf.txt ; htmls compiling-gf.html
|
||||
|
||||
%!target:html
|
||||
%!postproc(html): #NEW <!-- NEW -->
|
||||
|
||||
#NEW
|
||||
|
||||
==The compilation task==
|
||||
|
||||
GF is a grammar formalism, i.e. a special purpose programming language
|
||||
for writing grammars.
|
||||
|
||||
Other grammar formalisms:
|
||||
- BNF, YACC, Happy (grammars for programming languages);
|
||||
- PATR, HPSG, LFG (grammars for natural languages).
|
||||
|
||||
|
||||
The grammar compiler prepares a GF grammar for two computational tasks:
|
||||
- linearization: take syntax trees to strings
|
||||
- parsing: take strings to syntax trees
|
||||
|
||||
|
||||
The grammar gives a declarative description of these functionalities,
|
||||
on a high abstraction level that improves grammar writing
|
||||
productivity.
|
||||
|
||||
For efficiency, the grammar is compiled to lower-level formats.
|
||||
|
||||
Type checking is another essential compilation phase. Its purpose is
|
||||
twofold, as usual:
|
||||
- checking the correctness of the grammar
|
||||
- type-annotating expressions for code generation
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Characteristics of GF language==
|
||||
|
||||
Functional language with types, both built-in and user-defined.
|
||||
```
|
||||
Str : Type
|
||||
|
||||
param Number = Sg | Pl
|
||||
|
||||
param AdjForm = ASg Gender | APl
|
||||
|
||||
Noun : Type = {s : Number => Str ; g : Gender}
|
||||
```
|
||||
Pattern matching.
|
||||
```
|
||||
svart_A = table {
|
||||
ASg _ => "svart" ;
|
||||
_ => "svarta"
|
||||
}
|
||||
```
|
||||
Higher-order functions.
|
||||
|
||||
Dependent types.
|
||||
```
|
||||
flip : (a, b, c : Type) -> (a -> b -> c) -> b -> a -> c =
|
||||
\_,_,_,f,y,x -> f x y ;
|
||||
```
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The module system of GF==
|
||||
|
||||
Main division: abstract syntax and concrete syntax
|
||||
```
|
||||
abstract Greeting = {
|
||||
cat Greet ;
|
||||
fun Hello : Greet ;
|
||||
}
|
||||
|
||||
concrete GreetingEng of Greeting = {
|
||||
lincat Greet = {s : Str} ;
|
||||
lin Hello = {s = "hello"} ;
|
||||
}
|
||||
|
||||
concrete GreetingIta of Greeting = {
|
||||
param Politeness = Familiar | Polite ;
|
||||
lincat Greet = {s : Politeness => Str} ;
|
||||
lin Hello = {s = table {
|
||||
Familiar => "ciao" ;
|
||||
Polite => "buongiorno"
|
||||
} ;
|
||||
}
|
||||
```
|
||||
Other features of the module system:
|
||||
- extension and opening
|
||||
- parametrized modules (cf. ML: signatures, structures, functors)
|
||||
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==GF vs. Haskell==
|
||||
|
||||
Some things that (standard) Haskell hasn't:
|
||||
- records and record subtyping
|
||||
- regular expression patterns
|
||||
- dependent types
|
||||
- ML-style modules
|
||||
|
||||
|
||||
Some things that GF hasn't:
|
||||
- infinite (recursive) data types
|
||||
- recursive functions
|
||||
- classes, polymorphism
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==GF vs. most linguistic grammar formalisms==
|
||||
|
||||
GF separates abstract syntax from concrete syntax.
|
||||
|
||||
GF has a module system with separate compilation.
|
||||
|
||||
GF is generation-oriented (as opposed to parsing).
|
||||
|
||||
GF has unidirectional matching (as opposed to unification).
|
||||
|
||||
GF has a static type system (as opposed to a type-free universe).
|
||||
|
||||
"I was - and I still am - firmly convinced that a program composed
|
||||
out of statically type-checked parts is more likely to faithfully
|
||||
express a well-thought-out design than a program relying on
|
||||
weakly-typed interfaces or dynamically-checked interfaces."
|
||||
(B. Stroustrup, 1994, p. 107)
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The computation model: abstract syntax==
|
||||
|
||||
An abstract syntax defines a free algebra of trees (using
|
||||
dependent types, recursion, higher-order abstract syntax:
|
||||
GF includes a complete Logical Framework).
|
||||
```
|
||||
cat C (x_1 : A_1)...(x_n : A_n)
|
||||
a_1 : A_1
|
||||
...
|
||||
a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1}
|
||||
----------------------------------------------------
|
||||
(C a_1 ... a_n) : Type
|
||||
|
||||
|
||||
fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A
|
||||
a_1 : A_1
|
||||
...
|
||||
a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1}
|
||||
----------------------------------------------------
|
||||
(f a_1 ... a_n) : A{x_1 : A_1,...,x_n : A_n}
|
||||
|
||||
|
||||
A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A
|
||||
---------------------------- ---------------------- ------------------------
|
||||
(x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A}
|
||||
```
|
||||
Notice that all syntax trees are in eta-long form.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The computation model: concrete syntax==
|
||||
|
||||
A concrete syntax defines a homomorphism (compositional mapping)
|
||||
from the abstract syntax to a system of concrete syntax objects.
|
||||
```
|
||||
cat C _
|
||||
--------------------
|
||||
lincat C = C* : Type
|
||||
|
||||
fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A
|
||||
-----------------------------------------------
|
||||
lin f = f* : A_1* -> ... -> A_n* -> A*
|
||||
|
||||
(f a_1 ... a_n)* = f* a_1* ... a_n*
|
||||
```
|
||||
The homomorphism can as such be used as linearization function.
|
||||
|
||||
It is a functional program, but a restricted one, since it works
|
||||
in the end on finite data structures only.
|
||||
|
||||
But a more efficient program is obtained via compilation to
|
||||
GFC = Canonical GF: the "machine code" of GF.
|
||||
|
||||
The parsing problem of GFC can be reduced to that of MPCFG (Multiple
|
||||
Parallel Context Free Grammars), see P. Ljunglöf's thesis (2004).
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The core type system of concrete syntax: basic types==
|
||||
|
||||
```
|
||||
param P P : PType
|
||||
PType : Type --------- ---------
|
||||
P : PType P : Type
|
||||
|
||||
s : Str t : Str
|
||||
Str : type "foo" : Str [] : Str ----------------
|
||||
s ++ t : Str
|
||||
```
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The core type system of concrete syntax: functions and tables==
|
||||
|
||||
```
|
||||
A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A
|
||||
---------------------------- ---------------------- ------------------------
|
||||
(x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A}
|
||||
|
||||
|
||||
P : PType A : Type t : P => A p : p
|
||||
-------------------- -----------------
|
||||
P => A : Type t ! p : A
|
||||
|
||||
v_1,...,v_n : A
|
||||
---------------------------------------------- P = {C_1,...,C_n}
|
||||
table {C_1 => v_1 ; ... ; C_n => v_n} : P => A
|
||||
```
|
||||
Pattern matching is treated as an abbreviation for tables. Notice that
|
||||
```
|
||||
case e of {...} == table {...} ! e
|
||||
```
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The core type system of concrete syntax: records==
|
||||
|
||||
```
|
||||
A_1,...,A_n : Type
|
||||
------------------------------------ n >= 0
|
||||
{r_1 : A_1 ; ... ; r_n : A_n} : Type
|
||||
|
||||
|
||||
a_1 : A_1 ... a_n : A_n
|
||||
------------------------------------------------------------
|
||||
{r_1 = a_1 ; ... ; r_n = a_n} : {r_1 : A_1 ; ... ; r_n : A_n}
|
||||
|
||||
|
||||
r : {r_1 : A_1 ; ... ; r_n : A_n}
|
||||
----------------------------------- i = 1,...,n
|
||||
r.r_1 : A_1
|
||||
```
|
||||
Subtyping: if ``r : R`` then ``r : R ** {r : A}``
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Computation rules==
|
||||
|
||||
```
|
||||
(\x -> b) a = b{x := a}
|
||||
|
||||
(table {C_1 => v_1 ; ... ; C_n => v_n} : P => A) ! C_i = v_i
|
||||
|
||||
{r_1 = a_1 ; ... ; r_n = a_n}.r_i = a_i
|
||||
```
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Canonical GF==
|
||||
|
||||
Concrete syntax type system:
|
||||
```
|
||||
A_1 : Type ... A_n : Type
|
||||
Str : Type Int : Type ------------------------- $i : A
|
||||
[A_1, ..., A_n] : Type
|
||||
|
||||
|
||||
a_1 : A_1 ... a_n : A_n t : [A_1, ..., A_n]
|
||||
--------------------------------- ------------------- i = 1,..,n
|
||||
[a_1, ..., a_n] : [A_1, ..., A_n] t ! i : A_i
|
||||
```
|
||||
Tuples represent both records and tables.
|
||||
|
||||
There are no functions.
|
||||
|
||||
Linearization:
|
||||
```
|
||||
lin f = f*
|
||||
|
||||
(f a_1 ... a_n)* = f*{$1 = a_1*, ..., $n = a_n*}
|
||||
```
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The compilation task, again==
|
||||
|
||||
1. From a GF source grammar, derive a canonical GF grammar.
|
||||
|
||||
2. From the canonical GF grammar derive an MPCFG grammar
|
||||
|
||||
The canonical GF grammar can be used for linearization, with
|
||||
linear time complexity (w.r.t. the size of the tree).
|
||||
|
||||
The MPCFG grammar can be used for parsing, with (unbounded)
|
||||
polynomial time complexity (w.r.t. the size of the string).
|
||||
|
||||
For these target formats, we have also built interpreters in
|
||||
different programming languages (C, C++, Haskell, Java, Prolog).
|
||||
|
||||
Moreover, we generate supplementary formats such as grammars
|
||||
required by various speech recognition systems.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==An overview of compilation phases==
|
||||
|
||||
Legend:
|
||||
- ellipse node: representation saved in a file
|
||||
- plain text node: internal representation
|
||||
- solid arrow or ellipse: essential phare or format
|
||||
- dashed arrow or ellipse: optional phase or format
|
||||
- arrow label: the module implementing the phase
|
||||
|
||||
|
||||
[gf-compiler.png]
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Using the compiler==
|
||||
|
||||
Batch mode (cf. GHC).
|
||||
|
||||
Interactive mode, building the grammar incrementally from
|
||||
different files, with the possibility of testing them
|
||||
(cf. GHCI).
|
||||
|
||||
The interactive mode was first, built on the model of ALF-2
|
||||
(L. Magnusson), and there was no file output of compiled
|
||||
grammars.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Modules and separate compilation==
|
||||
|
||||
The above diagram shows what happens to each module.
|
||||
(But not quite, since some of the back-end formats must be
|
||||
built for sets of modules: GFCC and the parser formats.)
|
||||
|
||||
When the grammar compiler is called, it has a main module as its
|
||||
argument. It then builds recursively a dependency graph with all
|
||||
the other modules, and decides which ones must be recompiled.
|
||||
The behaviour is rather similar to GHC.
|
||||
|
||||
Separate compilation is //extremely important// when developing
|
||||
big grammars, especially when using grammar libraries. Example: compiling
|
||||
the GF resource grammar library takes 5 minutes, whereas reading
|
||||
in the compiled image takes 10 seconds.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Module dependencies and recompilation==
|
||||
|
||||
(For later use, not for the Proglog talk)
|
||||
|
||||
For each module M, there are 3 kinds of files:
|
||||
- M.gf, source file
|
||||
- M.gfc, compiled file ("object file")
|
||||
- M.gfr, type-checked and optimized source file (for resource modules only)
|
||||
|
||||
|
||||
The compiler reads gf files and writes gfc files (and gfr files if appropriate)
|
||||
|
||||
The Main module is the one used as argument when calling GF.
|
||||
|
||||
A module M (immediately) depends on the module K, if either
|
||||
- M is a concrete of K
|
||||
- M is an instance of K
|
||||
- M extends K
|
||||
- M opens K
|
||||
- M is a completion of K with something
|
||||
- M is a completion of some module with K instantiated with something
|
||||
|
||||
|
||||
A module M (transitively) depends on the module K, if either
|
||||
- M immediately depends on K
|
||||
- M depends on some L such that L immediately depends on K
|
||||
|
||||
|
||||
Immediate dependence is readable from the module header without parsing
|
||||
the whole module.
|
||||
|
||||
The compiler reads recursively the headers of all modules that Main depends on.
|
||||
|
||||
These modules are arranged in a dependency graph, which is checked to be acyclic.
|
||||
|
||||
To decide whether a module M has to be compiled, do:
|
||||
+ Get the time stamps t() of M.gf and M.gfc (if a file doesn't exist, its
|
||||
time is minus infinity).
|
||||
+ If t(M.gf) > t(M.gfc), M must be compiled.
|
||||
+ If M depends on K and K must be compiled, then M must be compiled.
|
||||
+ If M depends on K and t(K.gf) > t(M.gfc), then M must be compiled.
|
||||
|
||||
|
||||
Decorate the dependency graph by information on whether the gf or the gfc (and gfr)
|
||||
format is to be read.
|
||||
|
||||
Topologically sort the decorated graph, and read each file in the chosen format.
|
||||
|
||||
The gfr file is generated for these module types only:
|
||||
- resource
|
||||
- instance
|
||||
|
||||
|
||||
When reading K.gfc, also K.gfr is read if some M depending on K has to be compiled.
|
||||
In other cases, it is enough to read K.gfc.
|
||||
|
||||
In an interactive GF session, some modules may be in memory already.
|
||||
When read to the memory, each module M is given time stamp t(M.m).
|
||||
The additional rule now is:
|
||||
- If M.gfc is to be read, and t(M.m) > t(M.gfc), don't read M.gfc.
|
||||
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Techniques used==
|
||||
|
||||
The compiler is written in Haskell, with some C foreign function calls
|
||||
in the interactive version (readline, killing threads).
|
||||
|
||||
BNFC is used for generating both the parsers and printers.
|
||||
This has helped to make the formats portable.
|
||||
|
||||
"Almost compositional functions" (``composOp``) are used in
|
||||
many compiler passes, making them easier to write and understand.
|
||||
A ``grep`` on the sources reveals 40 uses (outside the definition
|
||||
of ``composOp`` itself).
|
||||
|
||||
The key algorithmic ideas are
|
||||
- type-driven partial evaluation in GF-to-GFC generation
|
||||
- common subexpression elimination as back-end optimization
|
||||
- some ideas in GFC-to-MCFG encoding
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Type-driven partial evaluation==
|
||||
|
||||
Each abstract syntax category in GF has a corresponding linearization type:
|
||||
```
|
||||
cat C
|
||||
lincat C = T
|
||||
```
|
||||
The general form of a GF rule pair is
|
||||
```
|
||||
fun f : C1 -> ... -> Cn -> C
|
||||
lin f = t
|
||||
```
|
||||
with the typing condition following the ``lincat`` definitions
|
||||
```
|
||||
t : T1 -> ... -> Tn -> T
|
||||
```
|
||||
The term ``t`` is in general built by using abstraction methods such
|
||||
as pattern matching, higher-order functions, local definitions,
|
||||
and library functions.
|
||||
|
||||
The compilation technique proceeds as follows:
|
||||
- use eta-expansion on ``t`` to determine the canonical form of the term
|
||||
```
|
||||
\ $C1, ...., $Cn -> (t $C1 .... $Cn)
|
||||
```
|
||||
with unique variables ``$C1 .... $Cn`` for the arguments; repeat this
|
||||
inside the term for records and tables
|
||||
- evaluate the resulting term using the computation rules of GF
|
||||
- what remains is a canonical term with ``$C1 .... $Cn`` the only
|
||||
variables (the run-time input of the linearization function)
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Eta-expanding records and tables==
|
||||
|
||||
For records that are valied via subtyping, eta expansion
|
||||
eliminates superfluous fields:
|
||||
```
|
||||
{r1 = t1 ; r2 = t2} : {r1 : T1} ----> {r1 = t1}
|
||||
```
|
||||
For tables, the effect is always expansion, since
|
||||
pattern matching can be used to represent tables
|
||||
compactly:
|
||||
```
|
||||
table {n => "fish"} : Number => Str --->
|
||||
|
||||
table {
|
||||
Sg => "fish" ;
|
||||
Pl => "fish"
|
||||
}
|
||||
```
|
||||
This can be helped by back-end optimizations (see below).
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Eliminating functions==
|
||||
|
||||
"Everything is finite": parameter types, records, tables;
|
||||
finite number of string tokens per grammar.
|
||||
|
||||
But "inifinite types" such as function types are useful when
|
||||
writing grammars, to enable abstractions.
|
||||
|
||||
Since function types do not appear in linearization types,
|
||||
we want functions to be eliminated from linearization terms.
|
||||
|
||||
This is similar to the **subformula property** in logic.
|
||||
Also the main problem is similar: function depending on
|
||||
a run-time variable,
|
||||
```
|
||||
(table {P => f ; Q = g} ! x) a
|
||||
```
|
||||
This is not a redex, but we can make it closer to one by moving
|
||||
the application inside the table,
|
||||
```
|
||||
table {P => f a ; Q = g a} ! x
|
||||
```
|
||||
This transformation is the same as Prawitz's (1965) elimination
|
||||
of maximal segments in natural deduction:
|
||||
```
|
||||
A B
|
||||
C -> D C C -> D C
|
||||
A B --------- ---------
|
||||
A v B C -> D C -> D A v B D D
|
||||
--------------------- ===> -------------------------
|
||||
C -> D C D
|
||||
--------------------
|
||||
D
|
||||
```
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Size effects of partial evaluation==
|
||||
|
||||
Irrelevant table branches are thrown away, which can reduce the size.
|
||||
|
||||
But, since tables are expanded and auxiliary functions are inlined,
|
||||
the size can grow exponentially.
|
||||
|
||||
How can we keep the first property and eliminate the second?
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Parametrization of tables==
|
||||
|
||||
Algorithm: for each branch in a table, consider replacing the
|
||||
argument by a variable:
|
||||
```
|
||||
table { table {
|
||||
P => t ; ---> x => t[P->x] ;
|
||||
Q => u x => u[Q->x]
|
||||
} }
|
||||
```
|
||||
If the resulting branches are all equal, you can replace the table
|
||||
by a lambda abstract
|
||||
```
|
||||
\\x => t[P->x]
|
||||
```
|
||||
If each created variable ``x`` is unique in the grammar, computation
|
||||
with the lambda abstract is efficient.
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Course-of-values tables==
|
||||
|
||||
By maintaining a canonical order of parameters in a type, we can
|
||||
eliminate the left hand sides of branches.
|
||||
```
|
||||
table { table T [
|
||||
P => t ; ---> t ;
|
||||
Q => u u
|
||||
} ]
|
||||
```
|
||||
The treatment is similar to ``Enum`` instances in Haskell.
|
||||
|
||||
In the end, all parameter types can be translated to
|
||||
initial segments of integers.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Common subexpression elimination==
|
||||
|
||||
Algorithm:
|
||||
+ Go through all terms and subterms in a module, creating
|
||||
a symbol table mapping terms to the number of occurrences.
|
||||
+ For each subterm appearing at least twice, create a fresh
|
||||
constant defined as that subterm.
|
||||
+ Go through all rules (incl. rules for the new constants),
|
||||
replacing largest possible subterms with such new constants.
|
||||
|
||||
|
||||
This algorithm, in a way, creates the strongest possible abstractions.
|
||||
|
||||
In general, the new constants have open terms as definitions.
|
||||
But since all variables (and constants) are unique, they can
|
||||
be computed by simple replacement.
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Size effects of optimizations==
|
||||
|
||||
Example: the German resource grammar
|
||||
``LangGer``
|
||||
|
||||
|| optimization | lines | characters | size % | blow-up |
|
||||
| none | 5394 | 3208435 | 100 | 25 |
|
||||
| all | 5394 | 750277 | 23 | 6 |
|
||||
| none_subs | 5772 | 1290866 | 40 | 10 |
|
||||
| all_subs | 5644 | 414119 | 13 | 3 |
|
||||
| gfcc | 3279 | 190004 | 6 | 1.5 |
|
||||
| gf source | 3976 | 121939 | 4 | 1 |
|
||||
|
||||
|
||||
Optimization "all" means parametrization + course-of-values.
|
||||
|
||||
The source code size is an estimate, since it includes
|
||||
potentially irrelevant library modules, and comments.
|
||||
|
||||
The GFCC format is not reusable in separate compilation.
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The shared prefix optimization==
|
||||
|
||||
This is currently performed in GFCC only.
|
||||
|
||||
The idea works for languages that have a rich morphology
|
||||
based on suffixes. Then we can replace a course of values
|
||||
with a pair of a prefix and a suffix set:
|
||||
```
|
||||
["apa", "apan", "apor", "aporna"] --->
|
||||
("ap" + ["a", "an", "or", "orna"])
|
||||
```
|
||||
The real gain comes via common subexpression elimination:
|
||||
```
|
||||
_34 = ["a", "an", "or", "orna"]
|
||||
apa = ("ap" + _34)
|
||||
blomma = ("blomm" + _34)
|
||||
flicka = ("flick" + _34)
|
||||
```
|
||||
Notice that it now matters a lot how grammars are written.
|
||||
For instance, if German verbs are treated as a one-dimensional
|
||||
table,
|
||||
```
|
||||
["lieben", "liebe", "liebst", ...., "geliebt", "geliebter",...]
|
||||
```
|
||||
no shared prefix optimization is possible. A better form is
|
||||
separate tables for non-"ge" and "ge" forms:
|
||||
```
|
||||
[["lieben", "liebe", "liebst", ....], ["geliebt", "geliebter",...]]
|
||||
```
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Reuse of grammars as libraries==
|
||||
|
||||
The idea of resource grammars: take care of all aspects of
|
||||
surface grammaticality (inflection, agreement, word order).
|
||||
|
||||
Reuse in application grammar: via translations
|
||||
```
|
||||
cat C ---> oper C : Type = T
|
||||
lincat C = T
|
||||
|
||||
fun f : A ---> oper f : A* = t
|
||||
lin f = t
|
||||
```
|
||||
The user only needs to know the type signatures (abstract syntax).
|
||||
|
||||
However, this does not quite guarantee grammaticality, because
|
||||
different categories can have the same lincat:
|
||||
```
|
||||
lincat Conj = {s : Str}
|
||||
lincat Adv = {s : Str}
|
||||
```
|
||||
Thus someone may by accident use "and" as an adverb!
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Forcing the type checker to act as a grammar checker==
|
||||
|
||||
We just have to make linearization types unique for each category.
|
||||
|
||||
The technique is reminiscent of Haskell's ``newtype`` but uses
|
||||
records instead: we add **lock fields** e.g.
|
||||
```
|
||||
lincat Conj = {s : Str ; lock_Conj : {}}
|
||||
lincat Adv = {s : Str ; lock_Adv : {}}
|
||||
```
|
||||
Thanks to record subtyping, the translation is simple:
|
||||
```
|
||||
fun f : C1 -> ... -> Cn -> C
|
||||
lin f = t
|
||||
|
||||
--->
|
||||
|
||||
oper f : C1* -> ... -> Cn* -> C* =
|
||||
\x1,...,xn -> (t x1 ... xn) ** {lock_C = {}}
|
||||
```
|
||||
|
||||
#NEW
|
||||
|
||||
==Things to do==
|
||||
|
||||
Better compression of gfc file format.
|
||||
|
||||
Type checking of dependent-type pattern matching in abstract syntax.
|
||||
|
||||
Compilation-related modules that need rewriting
|
||||
- ``ReadFiles``: clarify the logic of dependencies
|
||||
- ``Compile``: clarify the logic of what to do with each module
|
||||
- ``Compute``: make the evaluation more efficient
|
||||
- ``Parsing/*``, ``OldParsing/*``, ``Conversion/*``: reduce the number
|
||||
of parser formats and algorithms
|
||||
@@ -1,79 +0,0 @@
|
||||
graph{
|
||||
|
||||
size = "7,7" ;
|
||||
|
||||
overlap = scale ;
|
||||
|
||||
"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ;
|
||||
|
||||
"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"1" -- "Abs" [style = "solid"];
|
||||
|
||||
"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"2" -- "Abs" [style = "solid"];
|
||||
|
||||
"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"3" -- "Abs" [style = "solid"];
|
||||
|
||||
"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"4" -- "Abs" [style = "solid"];
|
||||
|
||||
"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"5" -- "Abs" [style = "solid"];
|
||||
|
||||
"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"6" -- "Abs" [style = "solid"];
|
||||
|
||||
"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"7" -- "Abs" [style = "solid"];
|
||||
|
||||
"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"8" -- "Abs" [style = "solid"];
|
||||
|
||||
"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"9" -- "Abs" [style = "solid"];
|
||||
|
||||
"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"10" -- "Abs" [style = "solid"];
|
||||
|
||||
"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"11" -- "Abs" [style = "solid"];
|
||||
|
||||
"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "12" [style = "solid"];
|
||||
|
||||
"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "13" [style = "solid"];
|
||||
|
||||
"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "14" [style = "solid"];
|
||||
|
||||
"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "15" [style = "solid"];
|
||||
|
||||
"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "16" [style = "solid"];
|
||||
|
||||
"17" [label = "Polish", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "17" [style = "solid"];
|
||||
|
||||
"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "18" [style = "solid"];
|
||||
|
||||
"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "19" [style = "solid"];
|
||||
|
||||
"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "20" [style = "solid"];
|
||||
|
||||
"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "21" [style = "solid"];
|
||||
|
||||
"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"Abs" -- "22" [style = "solid"];
|
||||
|
||||
"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"Abs" -- "23" [style = "solid"];
|
||||
|
||||
|
||||
}
|
||||
BIN
doc/eu-langs.png
|
Before Width: | Height: | Size: 84 KiB |
|
Before Width: | Height: | Size: 22 KiB |
BIN
doc/food1.png
|
Before Width: | Height: | Size: 22 KiB |
BIN
doc/food2.png
|
Before Width: | Height: | Size: 31 KiB |
@@ -1,88 +0,0 @@
|
||||
digraph {
|
||||
|
||||
gfe [label = "file.gfe", style = "dashed", shape = "ellipse"];
|
||||
gfe -> gf1 [label = " MkConcrete", style = "dashed"];
|
||||
|
||||
gf1 [label = "file.gf", style = "solid", shape = "ellipse"];
|
||||
gf1 -> gf2 [label = " LexGF", style = "solid"];
|
||||
|
||||
gf2 [label = "token list", style = "solid", shape = "plaintext"];
|
||||
gf2 -> gf3 [label = " ParGF", style = "solid"];
|
||||
|
||||
gf3 [label = "source tree", style = "solid", shape = "plaintext"];
|
||||
gf3 -> gf4 [label = " SourceToGrammar", style = "solid"];
|
||||
|
||||
cf [label = "file.cf", style = "dashed", shape = "ellipse"];
|
||||
cf -> gf4 [label = " CF.PPrCF", style = "dashed"];
|
||||
|
||||
ebnf [label = "file.ebnf", style = "dashed", shape = "ellipse"];
|
||||
ebnf -> gf4 [label = " CF.EBNF", style = "dashed"];
|
||||
|
||||
|
||||
gf4 [label = "GF tree", style = "solid", shape = "plaintext"];
|
||||
gf4 -> gf5 [label = " Extend", style = "solid"];
|
||||
|
||||
gf5 [label = "inheritance-linked GF tree", style = "solid", shape = "plaintext"];
|
||||
gf5 -> gf6 [label = " Rename", style = "solid"];
|
||||
|
||||
gf6 [label = "name-resolved GF tree", style = "solid", shape = "plaintext"];
|
||||
gf6 -> gf7 [label = " CheckGrammar", style = "solid"];
|
||||
|
||||
gf7 [label = "type-annotated GF tree", style = "solid", shape = "plaintext"];
|
||||
gf7 -> gf8 [label = " Optimize", style = "solid"];
|
||||
|
||||
gf8 [label = "optimized GF tree", style = "solid", shape = "plaintext"];
|
||||
gf8 -> gf9 [label = " GrammarToCanon", style = "solid"];
|
||||
|
||||
gf9 [label = "GFC tree", style = "solid", shape = "plaintext"];
|
||||
gf9 -> gfc [label = " BackOpt", style = "solid"];
|
||||
|
||||
gfc [label = "optimized GFC tree", style = "solid", shape = "box"];
|
||||
gfc -> gf11 [label = " PrintGFC", style = "solid"];
|
||||
|
||||
gf11 [label = "file.gfc", style = "solid", shape = "ellipse"];
|
||||
|
||||
|
||||
gfcc [label = "file.gfcc", style = "solid", shape = "ellipse"];
|
||||
gfc -> gfcc [label = " CanonToGFCC", style = "solid"];
|
||||
|
||||
mcfg [label = "file.gfcm", style = "dashed", shape = "ellipse"];
|
||||
gfc -> mcfg [label = " PrintGFC", style = "dashed"];
|
||||
|
||||
bnf [label = "file.cf", style = "dashed", shape = "ellipse"];
|
||||
gfc -> bnf [label = " CF.PrLBNF", style = "dashed"];
|
||||
|
||||
happy [label = "file.y (Happy)", style = "dashed", shape = "ellipse"];
|
||||
bnf -> happy [label = " bnfc", style = "dashed"];
|
||||
|
||||
bison [label = "file.y (Bison)", style = "dashed", shape = "ellipse"];
|
||||
bnf -> bison [label = " bnfc", style = "dashed"];
|
||||
|
||||
cup [label = "parser.java (CUP)", style = "dashed", shape = "ellipse"];
|
||||
bnf -> cup [label = " bnfc", style = "dashed"];
|
||||
|
||||
xml [label = "file.dtd (XML)", style = "dashed", shape = "ellipse"];
|
||||
bnf -> xml [label = " bnfc", style = "dashed"];
|
||||
|
||||
cfg [label = "CFG tree", style = "solid", shape = "plaintext"];
|
||||
gfc -> cfg [label = " Conversions.GFC", style = "dashed"];
|
||||
|
||||
cfgm [label = "file.cfgm", style = "dashed", shape = "ellipse"];
|
||||
cfg -> cfgm [label = " Conversions.GFC", style = "dashed"];
|
||||
|
||||
srg [label = "Non-LR CFG", style = "solid", shape = "plaintext"];
|
||||
cfg -> srg [label = " Speech.SRG", style = "dashed"];
|
||||
|
||||
gsl [label = "file.gsl", style = "dashed", shape = "ellipse"];
|
||||
srg -> gsl [label = " Speech.PrGSL", style = "dashed"];
|
||||
|
||||
jsgf [label = "file.jsgf", style = "dashed", shape = "ellipse"];
|
||||
srg -> jsgf [label = " Speech.PrJSGF", style = "dashed"];
|
||||
|
||||
fa [label = "DFA", style = "solid", shape = "plaintext"];
|
||||
cfg -> fa [label = " Speech.CFGToFiniteState", style = "dashed"];
|
||||
|
||||
slf [label = "file.slf", style = "dashed", shape = "ellipse"];
|
||||
fa -> slf [label = " Speech.PrSLF", style = "dashed"];
|
||||
|
||||
}
|
||||
|
Before Width: | Height: | Size: 27 KiB |
@@ -1,350 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
||||
<TITLE>A Birds-Eye View of GF as a Grammar Formalism</TITLE>
|
||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
||||
<P ALIGN="center"><CENTER><H1>A Birds-Eye View of GF as a Grammar Formalism</H1>
|
||||
<FONT SIZE="4">
|
||||
<I>Author: Aarne Ranta</I><BR>
|
||||
Last update: Thu Feb 2 14:16:01 2006
|
||||
</FONT></CENTER>
|
||||
|
||||
<P></P>
|
||||
<HR NOSHADE SIZE=1>
|
||||
<P></P>
|
||||
<UL>
|
||||
<LI><A HREF="#toc1">GF in a few words</A>
|
||||
<LI><A HREF="#toc2">History of GF</A>
|
||||
<LI><A HREF="#toc3">Some key ingredients of GF in other grammar formalisms</A>
|
||||
<LI><A HREF="#toc4">Examples of descriptions in each formalism</A>
|
||||
<LI><A HREF="#toc5">Lambda terms and records</A>
|
||||
<LI><A HREF="#toc6">The structure of GF formalisms</A>
|
||||
<LI><A HREF="#toc7">The expressivity of GF</A>
|
||||
<LI><A HREF="#toc8">Grammars and parsing</A>
|
||||
<LI><A HREF="#toc9">Grammars as software libraries</A>
|
||||
<LI><A HREF="#toc10">Multilinguality</A>
|
||||
<LI><A HREF="#toc11">Parametrized modules</A>
|
||||
</UL>
|
||||
|
||||
<P></P>
|
||||
<HR NOSHADE SIZE=1>
|
||||
<P></P>
|
||||
<P>
|
||||
<IMG ALIGN="middle" SRC="Logos/gf0.png" BORDER="0" ALT="">
|
||||
</P>
|
||||
<P>
|
||||
<I>Abstract. This document gives a general description of the</I>
|
||||
<I>Grammatical Framework (GF), with comparisons to other grammar</I>
|
||||
<I>formalisms such as CG, ACG, HPSG, and LFG.</I>
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc1"></A>
|
||||
<H2>GF in a few words</H2>
|
||||
<P>
|
||||
Grammatical Framework (GF) is a grammar formalism
|
||||
based on <B>constructive type theory</B>.
|
||||
</P>
|
||||
<P>
|
||||
GF makes a distinction between <B>abstract syntax</B> and <B>concrete syntax</B>.
|
||||
</P>
|
||||
<P>
|
||||
The abstract syntax part of GF is a <B>logical framework</B>, with
|
||||
dependent types and higher-order functions.
|
||||
</P>
|
||||
<P>
|
||||
The concrete syntax is a system of <B>records</B> containing strings and features.
|
||||
</P>
|
||||
<P>
|
||||
A GF grammar defines a <B>reversible homomorphism</B> from an abstract syntax to a
|
||||
concrete syntax.
|
||||
</P>
|
||||
<P>
|
||||
A <B>multilingual GF grammar</B> is a set of concrete syntaxes associated with
|
||||
one abstract syntax.
|
||||
</P>
|
||||
<P>
|
||||
GF grammars are written in a high-level <B>functional programming language</B>,
|
||||
which is compiled into a <B>core language</B> (GFC).
|
||||
</P>
|
||||
<P>
|
||||
GF grammars can be used as <B>resources</B>, i.e. as libraries for writing
|
||||
new grammars; these are compiled and optimized by the method of
|
||||
<B>grammar composition</B>.
|
||||
</P>
|
||||
<P>
|
||||
GF has a <B>module system</B> that supports grammar engineering and separate
|
||||
compilation.
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc2"></A>
|
||||
<H2>History of GF</H2>
|
||||
<P>
|
||||
1988. Intuitionistic Categorial Grammar; type theory as abstract syntax,
|
||||
playing the role of Montague's analysis trees. Grammars implemented in Prolog.
|
||||
</P>
|
||||
<P>
|
||||
1994. Type-Theoretical Grammar. Abstract syntax organized as a system of
|
||||
combinators. Grammars implemented in ALF.
|
||||
</P>
|
||||
<P>
|
||||
1996. Multilingual Type-Theoretical Grammar. Rules for generating six
|
||||
languages from the same abstract syntax. Grammars implemented in ALF, ML, and
|
||||
Haskell.
|
||||
</P>
|
||||
<P>
|
||||
1998. The first implementation of GF as a language of its own.
|
||||
</P>
|
||||
<P>
|
||||
2000. New version of GF: high-level functional source language, records used
|
||||
for concrete syntax.
|
||||
</P>
|
||||
<P>
|
||||
2003. The module system.
|
||||
</P>
|
||||
<P>
|
||||
2004. Ljunglöf's thesis <I>Expressivity and Complexity of GF</I>.
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc3"></A>
|
||||
<H2>Some key ingredients of GF in other grammar formalisms</H2>
|
||||
<UL>
|
||||
<LI>[GF ]: Grammatical Framework
|
||||
<LI>[CG ]: categorial grammar
|
||||
<LI>[ACG ]: abstract categorial grammar
|
||||
<LI>[HPSG ]: head-driven phrase structure grammar
|
||||
<LI>[LFG ]: lexical functional grammar
|
||||
</UL>
|
||||
|
||||
<TABLE CELLPADDING="4" BORDER="1">
|
||||
<TR>
|
||||
<TD ALIGN="center">/</TD>
|
||||
<TD>GF</TD>
|
||||
<TD>ACG</TD>
|
||||
<TD>LFG</TD>
|
||||
<TD>HPSG</TD>
|
||||
<TD>CG</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>abstract vs concrete syntax</TD>
|
||||
<TD>X</TD>
|
||||
<TD>X</TD>
|
||||
<TD>?</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>type theory</TD>
|
||||
<TD>X</TD>
|
||||
<TD>X</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>X</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>records and features</TD>
|
||||
<TD>X</TD>
|
||||
<TD>-</TD>
|
||||
<TD>X</TD>
|
||||
<TD>X</TD>
|
||||
<TD>-</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
|
||||
<P></P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc4"></A>
|
||||
<H2>Examples of descriptions in each formalism</H2>
|
||||
<P>
|
||||
To be written...
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc5"></A>
|
||||
<H2>Lambda terms and records</H2>
|
||||
<P>
|
||||
In CS, abstract syntax is trees and concrete syntax is strings.
|
||||
This works more or less for programming languages.
|
||||
</P>
|
||||
<P>
|
||||
In CG, all syntax is lambda terms.
|
||||
</P>
|
||||
<P>
|
||||
In Montague grammar, abstract syntax is lambda terms and
|
||||
concrete syntax is trees. Abstract syntax as lambda terms
|
||||
can be considered well-established.
|
||||
</P>
|
||||
<P>
|
||||
In PATR and HPSG, concrete syntax it records. This can be considered
|
||||
well-established for natural languages.
|
||||
</P>
|
||||
<P>
|
||||
In ACG, both are lambda terms. This is more general than GF,
|
||||
but reversibility requires linearity restriction, which can be
|
||||
unnatural for grammar writing.
|
||||
</P>
|
||||
<P>
|
||||
In GF, linearization from lambda terms to records is reversible,
|
||||
and grammar writing is not restricted to linear terms.
|
||||
</P>
|
||||
<P>
|
||||
Grammar composition in ACG is just function composition. In GF,
|
||||
it is more restricted...
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc6"></A>
|
||||
<H2>The structure of GF formalisms</H2>
|
||||
<P>
|
||||
The following diagram (to be drawn properly!) describes the
|
||||
levels.
|
||||
</P>
|
||||
<PRE>
|
||||
| programming language design
|
||||
V
|
||||
GF source language
|
||||
|
|
||||
| type-directed partial evaluation
|
||||
V
|
||||
GFC assembly language
|
||||
|
|
||||
| Ljunglöf's translation
|
||||
V
|
||||
MCFG parser
|
||||
</PRE>
|
||||
<P>
|
||||
The last two phases are nontrivial mathematica properties.
|
||||
</P>
|
||||
<P>
|
||||
In most grammar formalisms, grammarians have to work on the GFC
|
||||
(or MCFG) level.
|
||||
</P>
|
||||
<P>
|
||||
Maybe they use macros - they are therefore like macro assemblers. But there
|
||||
are no separately compiled library modules, no type checking, etc.
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc7"></A>
|
||||
<H2>The expressivity of GF</H2>
|
||||
<P>
|
||||
Parsing complexity is the same as MCFG: polynomial, with
|
||||
unrestricted exponent depending on grammar.
|
||||
This is between TAG and HPSG.
|
||||
</P>
|
||||
<P>
|
||||
If semantic well-formedness (type theory) is taken into account,
|
||||
then arbitrary logic can be expressed. The well-formedness of
|
||||
abstract syntax is decidable, but the well-formedness of a
|
||||
concrete-syntax string can require an arbitrary proof construction
|
||||
and is therefore undecidable.
|
||||
</P>
|
||||
<P>
|
||||
Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF
|
||||
has the goal of assigning intended trees for strings. This is
|
||||
generalized to shared trees for different languages.
|
||||
</P>
|
||||
<P>
|
||||
The high-level language strives after the properties of
|
||||
writability and readability (programming language notions).
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc8"></A>
|
||||
<H2>Grammars and parsing</H2>
|
||||
<P>
|
||||
In many projects, a grammar is just seen as a <B>declarative parsing program</B>.
|
||||
</P>
|
||||
<P>
|
||||
For GF, a grammar is primarily the <B>definition of a language</B>.
|
||||
</P>
|
||||
<P>
|
||||
Detaching grammars from parsers is a good idea, giving
|
||||
</P>
|
||||
<UL>
|
||||
<LI>more efficient and robust parsing (statistical etc)
|
||||
<LI>cleaner grammars
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Separating abstract from concrete syntax is a prerequisite for this:
|
||||
we want parsers to return abstract syntax objects, and these must exist
|
||||
independently of parse trees.
|
||||
</P>
|
||||
<P>
|
||||
A possible radical approach to parsing:
|
||||
use a grammar to generate a treebank and machine-learn
|
||||
a statistical parser from this.
|
||||
</P>
|
||||
<P>
|
||||
Comparison: Steedman in CCG has done something like this.
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc9"></A>
|
||||
<H2>Grammars as software libraries</H2>
|
||||
<P>
|
||||
Reuse for different purposes.
|
||||
</P>
|
||||
<P>
|
||||
Grammar composition.
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc10"></A>
|
||||
<H2>Multilinguality</H2>
|
||||
<P>
|
||||
In <B>application grammars</B>, the AS is a semantic
|
||||
model, and a CS covers domain terminology and idioms.
|
||||
</P>
|
||||
<P>
|
||||
This can give publication-quality translation on
|
||||
limited domains (e.g. the WebALT project).
|
||||
</P>
|
||||
<P>
|
||||
Resource grammars with grammar composition lead to
|
||||
<B>compile-time transfer</B>.
|
||||
</P>
|
||||
<P>
|
||||
When is <B>run-time transfer</B> necessary?
|
||||
</P>
|
||||
<P>
|
||||
Cf. CLE (Core Language Engine).
|
||||
</P>
|
||||
<P>
|
||||
<!-- NEW -->
|
||||
</P>
|
||||
<A NAME="toc11"></A>
|
||||
<H2>Parametrized modules</H2>
|
||||
<P>
|
||||
This notion comes from the ML language in the 1980's.
|
||||
</P>
|
||||
<P>
|
||||
It can be used for sharing even more code between languages
|
||||
than their AS.
|
||||
</P>
|
||||
<P>
|
||||
Especially, for related languages (Scandinavian, Romance).
|
||||
</P>
|
||||
<P>
|
||||
Cf. grammar porting in CLE: what they do with untyped
|
||||
macro packages GF does with typable interfaces.
|
||||
</P>
|
||||
|
||||
<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) -->
|
||||
<!-- cmdline: txt2tags -thtml -\-toc gf-formalism.txt -->
|
||||
</BODY></HTML>
|
||||
@@ -1,279 +0,0 @@
|
||||
A Birds-Eye View of GF as a Grammar Formalism
|
||||
Author: Aarne Ranta
|
||||
Last update: %%date(%c)
|
||||
|
||||
% NOTE: this is a txt2tags file.
|
||||
% Create an html file from this file using:
|
||||
% txt2tags -thtml --toc gf-formalism.txt
|
||||
|
||||
%!target:html
|
||||
|
||||
%!postproc(html): #NEW <!-- NEW -->
|
||||
|
||||
[Logos/gf0.png]
|
||||
|
||||
//Abstract. This document gives a general description of the//
|
||||
//Grammatical Framework (GF), with comparisons to other grammar//
|
||||
//formalisms such as CG, ACG, HPSG, and LFG.//
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Logical Frameworks and Grammar Formalisms==
|
||||
|
||||
Logic - formalization of mathematics (mathematical language?)
|
||||
|
||||
Linguistics - formalization of natural language
|
||||
|
||||
Since math lang is a subset, we can expect similarities.
|
||||
|
||||
But in natural language we have
|
||||
- masses of empirical data
|
||||
- no right of reform
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==High-level programming==
|
||||
|
||||
We have to write a lot of program code when formalizing language.
|
||||
|
||||
We need a language with proper abstractions.
|
||||
|
||||
Cf. Paul Graham on Prolog: very high-level, but wrong abstractions.
|
||||
|
||||
Typed functional languages work well in maths.
|
||||
|
||||
We have developed one for linguistics
|
||||
- some extra constructs, e.g. inflection tables
|
||||
- constraint of reversibility (nontrivial math problem)
|
||||
|
||||
|
||||
Writing a grammar of e.g. French clitics should not be a topic
|
||||
on which one can write a paper - it should be easy to render in code
|
||||
the known facts about languages!
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==GF in a few words==
|
||||
|
||||
Grammatical Framework (GF) is a grammar formalism
|
||||
based on **constructive type theory**.
|
||||
|
||||
GF makes a distinction between **abstract syntax** and **concrete syntax**.
|
||||
|
||||
The abstract syntax part of GF is a **logical framework**, with
|
||||
dependent types and higher-order functions.
|
||||
|
||||
The concrete syntax is a system of **records** containing strings and features.
|
||||
|
||||
A GF grammar defines a **reversible homomorphism** from an abstract syntax to a
|
||||
concrete syntax.
|
||||
|
||||
A **multilingual GF grammar** is a set of concrete syntaxes associated with
|
||||
one abstract syntax.
|
||||
|
||||
GF grammars are written in a high-level **functional programming language**,
|
||||
which is compiled into a **core language** (GFC).
|
||||
|
||||
GF grammars can be used as **resources**, i.e. as libraries for writing
|
||||
new grammars; these are compiled and optimized by the method of
|
||||
**grammar composition**.
|
||||
|
||||
GF has a **module system** that supports grammar engineering and separate
|
||||
compilation.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==History of GF==
|
||||
|
||||
1988. Intuitionistic Categorial Grammar; type theory as abstract syntax,
|
||||
playing the role of Montague's analysis trees. Grammars implemented in Prolog.
|
||||
|
||||
1994. Type-Theoretical Grammar. Abstract syntax organized as a system of
|
||||
combinators. Grammars implemented in ALF.
|
||||
|
||||
1996. Multilingual Type-Theoretical Grammar. Rules for generating six
|
||||
languages from the same abstract syntax. Grammars implemented in ALF, ML, and
|
||||
Haskell.
|
||||
|
||||
1998. The first implementation of GF as a language of its own.
|
||||
|
||||
2000. New version of GF: high-level functional source language, records used
|
||||
for concrete syntax.
|
||||
|
||||
2003. The module system.
|
||||
|
||||
2004. Ljunglöf's thesis //Expressivity and Complexity of GF//.
|
||||
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Some key ingredients of GF in other grammar formalisms==
|
||||
|
||||
- [GF ]: Grammatical Framework
|
||||
- [CG ]: categorial grammar
|
||||
- [ACG ]: abstract categorial grammar
|
||||
- [HPSG ]: head-driven phrase structure grammar
|
||||
- [LFG ]: lexical functional grammar
|
||||
|
||||
|
||||
| / | GF | ACG | LFG | HPSG | CG |
|
||||
| abstract vs concrete syntax | X | X | ? | - | - |
|
||||
| type theory | X | X | - | - | X |
|
||||
| records and features | X | - | X | X | - |
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Examples of descriptions in each formalism==
|
||||
|
||||
To be written...
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Lambda terms and records==
|
||||
|
||||
In CS, abstract syntax is trees and concrete syntax is strings.
|
||||
This works more or less for programming languages.
|
||||
|
||||
In CG, all syntax is lambda terms.
|
||||
|
||||
In Montague grammar, abstract syntax is lambda terms and
|
||||
concrete syntax is trees. Abstract syntax as lambda terms
|
||||
can be considered well-established.
|
||||
|
||||
In PATR and HPSG, concrete syntax it records. This can be considered
|
||||
well-established for natural languages.
|
||||
|
||||
In ACG, both are lambda terms. This is more general than GF,
|
||||
but reversibility requires linearity restriction, which can be
|
||||
unnatural for grammar writing.
|
||||
|
||||
In GF, linearization from lambda terms to records is reversible,
|
||||
and grammar writing is not restricted to linear terms.
|
||||
|
||||
Grammar composition in ACG is just function composition. In GF,
|
||||
it is more restricted...
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The structure of GF formalisms==
|
||||
|
||||
The following diagram (to be drawn properly!) describes the
|
||||
levels.
|
||||
```
|
||||
| programming language design
|
||||
V
|
||||
GF source language
|
||||
|
|
||||
| type-directed partial evaluation
|
||||
V
|
||||
GFC assembly language
|
||||
|
|
||||
| Ljunglöf's translation
|
||||
V
|
||||
MCFG parser
|
||||
```
|
||||
The last two phases are nontrivial mathematica properties.
|
||||
|
||||
In most grammar formalisms, grammarians have to work on the GFC
|
||||
(or MCFG) level.
|
||||
|
||||
Maybe they use macros - they are therefore like macro assemblers. But there
|
||||
are no separately compiled library modules, no type checking, etc.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==The expressivity of GF==
|
||||
|
||||
Parsing complexity is the same as MCFG: polynomial, with
|
||||
unrestricted exponent depending on grammar.
|
||||
This is between TAG and HPSG.
|
||||
|
||||
If semantic well-formedness (type theory) is taken into account,
|
||||
then arbitrary logic can be expressed. The well-formedness of
|
||||
abstract syntax is decidable, but the well-formedness of a
|
||||
concrete-syntax string can require an arbitrary proof construction
|
||||
and is therefore undecidable.
|
||||
|
||||
Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF
|
||||
has the goal of assigning intended trees for strings. This is
|
||||
generalized to shared trees for different languages.
|
||||
|
||||
The high-level language strives after the properties of
|
||||
writability and readability (programming language notions).
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Grammars and parsing==
|
||||
|
||||
In many projects, a grammar is just seen as a **declarative parsing program**.
|
||||
|
||||
For GF, a grammar is primarily the **definition of a language**.
|
||||
|
||||
Detaching grammars from parsers is a good idea, giving
|
||||
- more efficient and robust parsing (statistical etc)
|
||||
- cleaner grammars
|
||||
|
||||
|
||||
Separating abstract from concrete syntax is a prerequisite for this:
|
||||
we want parsers to return abstract syntax objects, and these must exist
|
||||
independently of parse trees.
|
||||
|
||||
A possible radical approach to parsing:
|
||||
use a grammar to generate a treebank and machine-learn
|
||||
a statistical parser from this.
|
||||
|
||||
Comparison: Steedman in CCG has done something like this.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Grammars as software libraries==
|
||||
|
||||
Reuse for different purposes.
|
||||
|
||||
Grammar composition.
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Multilinguality==
|
||||
|
||||
In **application grammars**, the AS is a semantic
|
||||
model, and a CS covers domain terminology and idioms.
|
||||
|
||||
This can give publication-quality translation on
|
||||
limited domains (e.g. the WebALT project).
|
||||
|
||||
Resource grammars with grammar composition lead to
|
||||
**compile-time transfer**.
|
||||
|
||||
When is **run-time transfer** necessary?
|
||||
|
||||
Cf. CLE (Core Language Engine).
|
||||
|
||||
|
||||
#NEW
|
||||
|
||||
==Parametrized modules==
|
||||
|
||||
This notion comes from the ML language in the 1980's.
|
||||
|
||||
It can be used for sharing even more code between languages
|
||||
than their AS.
|
||||
|
||||
Especially, for related languages (Scandinavian, Romance).
|
||||
|
||||
Cf. grammar porting in CLE: what they do with untyped
|
||||
macro packages GF does with typable interfaces.
|
||||
@@ -1,311 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
|
||||
<TITLE>GF Project Ideas</TITLE>
|
||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
||||
|
||||
<P>
|
||||
<center>
|
||||
<IMG ALIGN="middle" SRC="Logos/gf0.png" BORDER="0" ALT="">
|
||||
</center>
|
||||
</P>
|
||||
|
||||
<P ALIGN="center"><CENTER>
|
||||
<H1>GF Project Ideas</H1>
|
||||
<FONT SIZE="4">
|
||||
<I>Resource Grammars, Web Applications, etc</I><BR>
|
||||
contact: Aarne Ranta (aarne at chalmers dot se)
|
||||
</FONT></CENTER>
|
||||
|
||||
<P></P>
|
||||
<HR NOSHADE SIZE=1>
|
||||
<P></P>
|
||||
<UL>
|
||||
<LI><A HREF="#toc1">Resource Grammar Implementations</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc2">Tasks</A>
|
||||
<LI><A HREF="#toc3">Who is qualified</A>
|
||||
<LI><A HREF="#toc4">The Summer School</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc5">Other project ideas</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc6">GF interpreter in Java</A>
|
||||
<LI><A HREF="#toc7">GF interpreter in C#</A>
|
||||
<LI><A HREF="#toc8">GF localization library</A>
|
||||
<LI><A HREF="#toc9">Multilingual grammar applications for mobile phones</A>
|
||||
<LI><A HREF="#toc10">Multilingual grammar applications for the web</A>
|
||||
<LI><A HREF="#toc11">GMail gadget for GF</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc12">Dissemination and intellectual property</A>
|
||||
</UL>
|
||||
|
||||
<P></P>
|
||||
<HR NOSHADE SIZE=1>
|
||||
<P></P>
|
||||
<A NAME="toc1"></A>
|
||||
<H2>Resource Grammar Implementations</H2>
|
||||
<P>
|
||||
GF Resource Grammar Library is an open-source computational grammar resource
|
||||
that currently covers 12 languages.
|
||||
The Library is a collaborative effort to which programmers from many countries
|
||||
have contributed. The next goal is to extend the library
|
||||
to all of the 23 official EU languages. Also other languages
|
||||
are welcome all the time. The following diagram show the current status of the
|
||||
library. Each of the red and yellow ones are a potential project.
|
||||
</P>
|
||||
<P>
|
||||
<center>
|
||||
<IMG ALIGN="middle" SRC="school-langs.png" BORDER="0" ALT="">
|
||||
</center>
|
||||
</P>
|
||||
<P>
|
||||
<I>red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu</I>
|
||||
</P>
|
||||
<P>
|
||||
The linguistic coverage of the library includes the inflectional morphology
|
||||
and basic syntax of each language. It can be used in GF applications
|
||||
and also ported to other formats. It can also be used for building other
|
||||
linguistic resources, such as morphological lexica and parsers.
|
||||
The library is licensed under LGPL.
|
||||
</P>
|
||||
<A NAME="toc2"></A>
|
||||
<H3>Tasks</H3>
|
||||
<P>
|
||||
Writing a grammar for a language is usually easier if other languages
|
||||
from the same family already have grammars. The colours have the same
|
||||
meaning as in the diagram above; in addition, we use boldface for the
|
||||
red, still unimplemented languages and italics for the
|
||||
orange languages in progress. Thus, in particular, each of the languages
|
||||
coloured red below are possible programming projects.
|
||||
</P>
|
||||
<P>
|
||||
Baltic:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="red"><b> Latvian </b></font>
|
||||
<LI><font color="red"><b> Lithuanian </b></font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Celtic:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="red"><b> Irish </b></font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Fenno-Ugric:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="red"><b> Estonian </b></font>
|
||||
<LI><font color="green" size="-1"> Finnish </font>
|
||||
<LI><font color="red"><b> Hungarian </b></font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Germanic:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="green" size="-1"> Danish </font>
|
||||
<LI><font color="red"><b> Dutch </b></font>
|
||||
<LI><font color="green" size="-1"> English </font>
|
||||
<LI><font color="green" size="-1"> German </font>
|
||||
<LI><font color="green" size="-1"> Norwegian </font>
|
||||
<LI><font color="green" size="-1"> Swedish </font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Hellenic:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="red"><b> Greek </b></font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Indo-Iranian:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="orange"><i> Hindi </i></font>
|
||||
<LI><font color="orange"><i> Urdu </i></font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Romance:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="green" size="-1"> Catalan </font>
|
||||
<LI><font color="green" size="-1"> French </font>
|
||||
<LI><font color="green" size="-1"> Italian </font>
|
||||
<LI><font color="red"><b> Portuguese </b></font>
|
||||
<LI><font color="orange"><i> Romanian </i></font>
|
||||
<LI><font color="green" size="-1"> Spanish </font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Semitic:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="orange"><i> Arabic </i></font>
|
||||
<LI><font color="red"><b> Maltese </b></font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Slavonic:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="green" size="-1"> Bulgarian </font>
|
||||
<LI><font color="red"><b> Czech </b></font>
|
||||
<LI><font color="orange"><i> Polish </i></font>
|
||||
<LI><font color="green" size="-1"> Russian </font>
|
||||
<LI><font color="red"><b> Slovak </b></font>
|
||||
<LI><font color="red"><b> Slovenian </b></font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Tai:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="orange"><i> Thai </i></font>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Turkic:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><font color="orange"><i> Turkish </i></font>
|
||||
</UL>
|
||||
|
||||
<A NAME="toc3"></A>
|
||||
<H3>Who is qualified</H3>
|
||||
<P>
|
||||
Writing a resource grammar implementation requires good general programming
|
||||
skills, and a good explicit knowledge of the grammar of the target language.
|
||||
A typical participant could be
|
||||
</P>
|
||||
<UL>
|
||||
<LI>native or fluent speaker of the target language
|
||||
<LI>interested in languages on the theoretical level, and preferably familiar
|
||||
with many languages (to be able to think about them on an abstract level)
|
||||
<LI>familiar with functional programming languages such as ML or Haskell
|
||||
(GF itself is a language similar to these)
|
||||
<LI>on Master's or PhD level in linguistics, computer science, or mathematics
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
But it is the quality of the assignment that is assessed, not any formal
|
||||
requirements. The "typical participant" was described to give an idea of
|
||||
who is likely to succeed in this.
|
||||
</P>
|
||||
<A NAME="toc4"></A>
|
||||
<H3>The Summer School</H3>
|
||||
<P>
|
||||
A Summer School on resource grammars and applications will
|
||||
be organized at the campus of Chalmers University of Technology in Gothenburg,
|
||||
Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in
|
||||
a resource grammar project; the participants are assumed to learn GF before
|
||||
the Summer School, but how far they have come in their projects may vary.
|
||||
</P>
|
||||
<P>
|
||||
More information on the Summer School web page:
|
||||
</P>
|
||||
<P>
|
||||
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html"><CODE>http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html</CODE></A>
|
||||
</P>
|
||||
<A NAME="toc5"></A>
|
||||
<H2>Other project ideas</H2>
|
||||
<A NAME="toc6"></A>
|
||||
<H3>GF interpreter in Java</H3>
|
||||
<P>
|
||||
The idea is to write a run-time system for GF grammars in Java. This enables
|
||||
the use of <B>embedded grammars</B> in Java applications. This project is
|
||||
a fresh-up of <A HREF="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">earlier work</A>,
|
||||
now using the new run-time format PGF and addressing a new parsing algorithm.
|
||||
</P>
|
||||
<P>
|
||||
Requirements: Java, Haskell, basics of compilers and parsing algorithms.
|
||||
</P>
|
||||
<A NAME="toc7"></A>
|
||||
<H3>GF interpreter in C#</H3>
|
||||
<P>
|
||||
The idea is to write a run-time system for GF grammars in C#. This enables
|
||||
the use of <B>embedded grammars</B> in C# applications. This project is
|
||||
similar to <A HREF="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">earlier work</A>
|
||||
on Java, now addressing C# and using the new run-time format PGF.
|
||||
</P>
|
||||
<P>
|
||||
Requirements: C#, Haskell, basics of compilers and parsing algorithms.
|
||||
</P>
|
||||
<A NAME="toc8"></A>
|
||||
<H3>GF localization library</H3>
|
||||
<P>
|
||||
This is an idea for a software localization library using GF grammars.
|
||||
The library should replace strings by grammar rules, which can be conceived
|
||||
as very smart templates always guaranteeing grammatically correct output.
|
||||
The library should be based on the
|
||||
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html">GF Resource Grammar Library</A>, providing infrastructure
|
||||
currently for 12 languages.
|
||||
</P>
|
||||
<P>
|
||||
Requirements: GF, some natural languages, some localization platform
|
||||
</P>
|
||||
<A NAME="toc9"></A>
|
||||
<H3>Multilingual grammar applications for mobile phones</H3>
|
||||
<P>
|
||||
GF grammars can be compiled into programs that can be run on different
|
||||
platforms, such as web browsers and mobile phones. An example is a
|
||||
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html">numeral translator</A> running on both these platforms.
|
||||
</P>
|
||||
<P>
|
||||
The proposed project is rather open: find some cool applications of
|
||||
the technology that are useful or entertaining for mobile phone users. A
|
||||
part of the project is to investigate implementation issues such as making
|
||||
the best use of the phone's resources. Possible applications have
|
||||
something to do with translation; one suggestion is an sms editor/translator.
|
||||
</P>
|
||||
<P>
|
||||
Requirements: GF, JavaScript, some phone application development tools
|
||||
</P>
|
||||
<A NAME="toc10"></A>
|
||||
<H3>Multilingual grammar applications for the web</H3>
|
||||
<P>
|
||||
This project is rather open: find some cool applications of
|
||||
the technology that are useful or entertaining on the web. Examples include
|
||||
</P>
|
||||
<UL>
|
||||
<LI>translators: see <A HREF="http://tournesol.cs.chalmers.se:41296/translate">demo</A>
|
||||
<LI>multilingual wikis: see <A HREF="http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi">demo</A>
|
||||
<LI>fridge magnets: see <A HREF="http://tournesol.cs.chalmers.se:41296/fridge">demo</A>
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI
|
||||
</P>
|
||||
<A NAME="toc11"></A>
|
||||
<H3>GMail gadget for GF</H3>
|
||||
<P>
|
||||
It is possible to add custom gadgets to GMail. If you are going to write
|
||||
e-mail in a foreign language then you probably will need help from
|
||||
dictonary or you may want to check something in the grammar. GF provides
|
||||
all resources that you may need but you have to think about how to
|
||||
design gadget that fits well in the GMail environment and what
|
||||
functionality from GF you want to expose.
|
||||
</P>
|
||||
<P>
|
||||
Requirements: GF, Google Web Toolkit
|
||||
</P>
|
||||
<A NAME="toc12"></A>
|
||||
<H2>Dissemination and intellectual property</H2>
|
||||
<P>
|
||||
All code suggested here will be released under the LGPL just like
|
||||
the current resource grammars and run-time GF libraries,
|
||||
with the copyright held by respective authors.
|
||||
</P>
|
||||
<P>
|
||||
As a rule, the code will be distributed via the GF web site.
|
||||
</P>
|
||||
|
||||
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
|
||||
<!-- cmdline: txt2tags -\-toc gf-ideas.txt -->
|
||||
</BODY></HTML>
|
||||
231
doc/gf-ideas.txt
@@ -1,231 +0,0 @@
|
||||
GF Project Ideas
|
||||
Resource Grammars, Web Applications, etc
|
||||
contact: Aarne Ranta (aarne at chalmers dot se)
|
||||
|
||||
%!Encoding : iso-8859-1
|
||||
|
||||
%!target:html
|
||||
%!postproc(html): #BECE <center>
|
||||
%!postproc(html): #ENCE </center>
|
||||
%!postproc(html): #GRAY <font color="green" size="-1">
|
||||
%!postproc(html): #EGRAY </font>
|
||||
%!postproc(html): #RED <font color="red"><b>
|
||||
%!postproc(html): #YELLOW <font color="orange"><i>
|
||||
%!postproc(html): #ERED </b></font>
|
||||
%!postproc(html): #EYELLOW </i></font>
|
||||
|
||||
#BECE
|
||||
[Logos/gf0.png]
|
||||
#ENCE
|
||||
|
||||
|
||||
==Resource Grammar Implementations==
|
||||
|
||||
GF Resource Grammar Library is an open-source computational grammar resource
|
||||
that currently covers 12 languages.
|
||||
The Library is a collaborative effort to which programmers from many countries
|
||||
have contributed. The next goal is to extend the library
|
||||
to all of the 23 official EU languages. Also other languages
|
||||
are welcome all the time. The following diagram show the current status of the
|
||||
library. Each of the red and yellow ones are a potential project.
|
||||
|
||||
#BECE
|
||||
[school-langs.png]
|
||||
#ENCE
|
||||
|
||||
|
||||
//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu//
|
||||
|
||||
The linguistic coverage of the library includes the inflectional morphology
|
||||
and basic syntax of each language. It can be used in GF applications
|
||||
and also ported to other formats. It can also be used for building other
|
||||
linguistic resources, such as morphological lexica and parsers.
|
||||
The library is licensed under LGPL.
|
||||
|
||||
|
||||
===Tasks===
|
||||
|
||||
Writing a grammar for a language is usually easier if other languages
|
||||
from the same family already have grammars. The colours have the same
|
||||
meaning as in the diagram above; in addition, we use boldface for the
|
||||
red, still unimplemented languages and italics for the
|
||||
orange languages in progress. Thus, in particular, each of the languages
|
||||
coloured red below are possible programming projects.
|
||||
|
||||
Baltic:
|
||||
- #RED Latvian #ERED
|
||||
- #RED Lithuanian #ERED
|
||||
|
||||
|
||||
Celtic:
|
||||
- #RED Irish #ERED
|
||||
|
||||
|
||||
Fenno-Ugric:
|
||||
- #RED Estonian #ERED
|
||||
- #GRAY Finnish #EGRAY
|
||||
- #RED Hungarian #ERED
|
||||
|
||||
|
||||
Germanic:
|
||||
- #GRAY Danish #EGRAY
|
||||
- #RED Dutch #ERED
|
||||
- #GRAY English #EGRAY
|
||||
- #GRAY German #EGRAY
|
||||
- #GRAY Norwegian #EGRAY
|
||||
- #GRAY Swedish #EGRAY
|
||||
|
||||
|
||||
Hellenic:
|
||||
- #RED Greek #ERED
|
||||
|
||||
|
||||
Indo-Iranian:
|
||||
- #YELLOW Hindi #EYELLOW
|
||||
- #YELLOW Urdu #EYELLOW
|
||||
|
||||
|
||||
Romance:
|
||||
- #GRAY Catalan #EGRAY
|
||||
- #GRAY French #EGRAY
|
||||
- #GRAY Italian #EGRAY
|
||||
- #RED Portuguese #ERED
|
||||
- #YELLOW Romanian #EYELLOW
|
||||
- #GRAY Spanish #EGRAY
|
||||
|
||||
|
||||
Semitic:
|
||||
- #YELLOW Arabic #EYELLOW
|
||||
- #RED Maltese #ERED
|
||||
|
||||
|
||||
Slavonic:
|
||||
- #GRAY Bulgarian #EGRAY
|
||||
- #RED Czech #ERED
|
||||
- #YELLOW Polish #EYELLOW
|
||||
- #GRAY Russian #EGRAY
|
||||
- #RED Slovak #ERED
|
||||
- #RED Slovenian #ERED
|
||||
|
||||
|
||||
Tai:
|
||||
- #YELLOW Thai #EYELLOW
|
||||
|
||||
|
||||
Turkic:
|
||||
- #YELLOW Turkish #EYELLOW
|
||||
|
||||
|
||||
===Who is qualified===
|
||||
|
||||
Writing a resource grammar implementation requires good general programming
|
||||
skills, and a good explicit knowledge of the grammar of the target language.
|
||||
A typical participant could be
|
||||
- native or fluent speaker of the target language
|
||||
- interested in languages on the theoretical level, and preferably familiar
|
||||
with many languages (to be able to think about them on an abstract level)
|
||||
- familiar with functional programming languages such as ML or Haskell
|
||||
(GF itself is a language similar to these)
|
||||
- on Master's or PhD level in linguistics, computer science, or mathematics
|
||||
|
||||
|
||||
But it is the quality of the assignment that is assessed, not any formal
|
||||
requirements. The "typical participant" was described to give an idea of
|
||||
who is likely to succeed in this.
|
||||
|
||||
|
||||
===The Summer School===
|
||||
|
||||
A Summer School on resource grammars and applications will
|
||||
be organized at the campus of Chalmers University of Technology in Gothenburg,
|
||||
Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in
|
||||
a resource grammar project; the participants are assumed to learn GF before
|
||||
the Summer School, but how far they have come in their projects may vary.
|
||||
|
||||
More information on the Summer School web page:
|
||||
|
||||
[``http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html`` http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html]
|
||||
|
||||
|
||||
==Other project ideas==
|
||||
|
||||
===GF interpreter in Java===
|
||||
|
||||
The idea is to write a run-time system for GF grammars in Java. This enables
|
||||
the use of **embedded grammars** in Java applications. This project is
|
||||
a fresh-up of [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html],
|
||||
now using the new run-time format PGF and addressing a new parsing algorithm.
|
||||
|
||||
Requirements: Java, Haskell, basics of compilers and parsing algorithms.
|
||||
|
||||
|
||||
===GF interpreter in C#===
|
||||
|
||||
The idea is to write a run-time system for GF grammars in C#. This enables
|
||||
the use of **embedded grammars** in C# applications. This project is
|
||||
similar to [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html]
|
||||
on Java, now addressing C# and using the new run-time format PGF.
|
||||
|
||||
Requirements: C#, Haskell, basics of compilers and parsing algorithms.
|
||||
|
||||
|
||||
===GF localization library===
|
||||
|
||||
This is an idea for a software localization library using GF grammars.
|
||||
The library should replace strings by grammar rules, which can be conceived
|
||||
as very smart templates always guaranteeing grammatically correct output.
|
||||
The library should be based on the
|
||||
[GF Resource Grammar Library http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html], providing infrastructure
|
||||
currently for 12 languages.
|
||||
|
||||
Requirements: GF, some natural languages, some localization platform
|
||||
|
||||
|
||||
===Multilingual grammar applications for mobile phones===
|
||||
|
||||
GF grammars can be compiled into programs that can be run on different
|
||||
platforms, such as web browsers and mobile phones. An example is a
|
||||
[numeral translator http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html] running on both these platforms.
|
||||
|
||||
The proposed project is rather open: find some cool applications of
|
||||
the technology that are useful or entertaining for mobile phone users. A
|
||||
part of the project is to investigate implementation issues such as making
|
||||
the best use of the phone's resources. Possible applications have
|
||||
something to do with translation; one suggestion is an sms editor/translator.
|
||||
|
||||
Requirements: GF, JavaScript, some phone application development tools
|
||||
|
||||
|
||||
===Multilingual grammar applications for the web===
|
||||
|
||||
This project is rather open: find some cool applications of
|
||||
the technology that are useful or entertaining on the web. Examples include
|
||||
- translators: see [demo http://129.16.250.57:41296/translate]
|
||||
- multilingual wikis: see [demo http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi]
|
||||
- fridge magnets: see [demo http://129.16.250.57:41296/fridge]
|
||||
|
||||
|
||||
Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI
|
||||
|
||||
|
||||
===GMail gadget for GF===
|
||||
|
||||
It is possible to add custom gadgets to GMail. If you are going to write
|
||||
e-mail in a foreign language then you probably will need help from
|
||||
dictonary or you may want to check something in the grammar. GF provides
|
||||
all resources that you may need but you have to think about how to
|
||||
design gadget that fits well in the GMail environment and what
|
||||
functionality from GF you want to expose.
|
||||
|
||||
Requirements: GF, Google Web Toolkit
|
||||
|
||||
|
||||
|
||||
==Dissemination and intellectual property==
|
||||
|
||||
All code suggested here will be released under the LGPL just like
|
||||
the current resource grammars and run-time GF libraries,
|
||||
with the copyright held by respective authors.
|
||||
|
||||
As a rule, the code will be distributed via the GF web site.
|
||||
|
||||
@@ -13,12 +13,13 @@
|
||||
|
||||
</center>
|
||||
|
||||
Most of the code is by
|
||||
<a "http://www.chalmers.se/cse/EN/organization/divisions/computing-science/people/angelov-krasimir">Krasimir Angelov</a>,
|
||||
<a href="http://www.cs.chalmers.se/~bringert">Björn Bringert</a>,
|
||||
The current developers and maintainers are
|
||||
<a href="http://www.chalmers.se/cse/EN/organization/divisions/computing-science/people/angelov-krasimir">Krasimir Angelov</a>,
|
||||
<a href="http://www.cs.chalmers.se/~hallgren">Thomas Hallgren</a>,
|
||||
and
|
||||
<a href="http://www.cs.chalmers.se/~aarne">Aarne Ranta</a>. Bug reports should be
|
||||
posted via the <a href="http://trac.haskell.org/gf/">GF bug tracker</a>.
|
||||
<a href="http://www.cse.chalmers.se/~aarne">Aarne Ranta</a>. Bug reports should be
|
||||
posted via the
|
||||
<a href="http://code.google.com/p/grammatical-framework/issues/list">GF bug tracker</a>.
|
||||
|
||||
|
||||
<p>
|
||||
@@ -27,19 +28,23 @@ Also the following people have contributed code to some of the versions:
|
||||
|
||||
<p>
|
||||
|
||||
Håkan Burden (Chalmers)
|
||||
Grégoire Détrez (University of Gothenburg)
|
||||
<br>
|
||||
Ramona Enache (University of Gothenburg)
|
||||
<br>
|
||||
<a href="http://www.cse.chalmers.se/alumni/bringert">Björn Bringert</a> (University of Gothenburg)
|
||||
<br>
|
||||
Håkan Burden (University of Gothenburg)
|
||||
<br>
|
||||
Hans-Joachim Daniels (Karlsruhe)
|
||||
<br>
|
||||
<a href="http://www.cs.chalmers.se/~markus">Markus Forsberg</a> (Chalmers)
|
||||
<br>
|
||||
<a href="http://www.cs.chalmers.se/~hallgren">Thomas Hallgren</a> (Chalmers)
|
||||
<a href="http://www.cs.chalmers.se/~krijo">Kristofer Johannisson</a> (University of Gothenburg)
|
||||
<br>
|
||||
<a href="http://www.cs.chalmers.se/~krijo">Kristofer Johannisson</a> (Chalmers)
|
||||
<a href="http://www.cs.chalmers.se/~janna">Janna Khegai</a> (Chalmers)
|
||||
<br>
|
||||
<a href="http://www.cs.chalmers.se/~janna">Janna Khegai</a> (Chalmers)
|
||||
<br>
|
||||
<a href="http://www.cs.chalmers.se/~peb">Peter Ljunglöf</a> (Chalmers)
|
||||
<a href="http://www.cs.chalmers.se/~peb">Peter Ljunglöf</a> (University of Gothenburg)
|
||||
<br>
|
||||
Petri Mäenpää (Nokia)
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
<p>
|
||||
Aarne Ranta
|
||||
<p>
|
||||
3 September, 2007
|
||||
22 December 2010 (3 September, 2007)
|
||||
|
||||
<p>
|
||||
|
||||
@@ -20,7 +20,7 @@ Aarne Ranta
|
||||
This Quick Start shows two examples of how GF can be used.
|
||||
We assume that you have downloaded and installed GF, so that
|
||||
the command <tt>gf</tt> works for you. See download and install
|
||||
instructions <a href="http://digitalgrammars.com/gf/download/">here</a>.
|
||||
instructions <a href="../download/index.html">here</a>.
|
||||
|
||||
|
||||
|
||||
@@ -61,39 +61,11 @@ and start GF again with the same command. Now you can even translate
|
||||
<i>this bread is very Italian</i>.
|
||||
</ol>
|
||||
To lear more on GF commands and
|
||||
grammar development, go to the
|
||||
<a href="tutorial/gf-tutorial2.html">New Grammarian's Tutorial</a>.
|
||||
grammar development, go to the one of the tutorials:
|
||||
<ul>
|
||||
<li> <a href="tutorial/gf-tutorial.html">GF Tutorial</a>: older, more programmer-oriented
|
||||
<li> <a href="gf-lrec-2010.pdf">GF Resource Tutorial</a>: newer, more linguist-oriented
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
<h2>Multilingual authoring</h2>
|
||||
|
||||
This demo also requires the GUI package, which makes the command
|
||||
<tt>jgf</tt> work for you.
|
||||
<ol>
|
||||
<li> Download the file <a href="../examples/letter/Letter.gfcm"><tt>Letter.gfcm</tt></a>.
|
||||
<li> Start the GF editor by the command
|
||||
<pre>
|
||||
gfeditor Letter.gfcm
|
||||
</pre>
|
||||
<li> When the editor window is open, select "Letter" from the "New" menu.
|
||||
<li> Push the button "Random" in the lower end of the window.
|
||||
<li> Move the pointer to some place in the text, e.g. to the first word (in any
|
||||
of the languages), and click. The first word should now be highlighted and
|
||||
a number of alternatives appear in the lower window part (a similar situation
|
||||
is shown in the picture below).
|
||||
<li> Double-click at some of the alternatives marked "ch ..." and observe how
|
||||
the text changes in each of the languages.
|
||||
</ol>
|
||||
See the <a href="http://www.cs.chalmers.se/~aarne/GF2.0/doc/javaGUImanual/javaGUImanual.htm">Editor User Manual</a>
|
||||
for more information on how to use the
|
||||
editor. To change the grammars, you should not edit <tt>Letter.gfcm</tt>,
|
||||
which is low-level code generated by the GF grammar compiler. Instead, you
|
||||
can edit the files in <tt>examples/letter</tt> in the GF grammar package,
|
||||
and compile by using the script <tt>mkLetter.gfs</tt> in the same package.
|
||||
|
||||
<p>
|
||||
|
||||
<img src="quick-editor.gif">
|
||||
|
||||
</body></html>
|
||||
|
||||
@@ -106,7 +106,7 @@ This document is not an introduction to GF; such introduction can be
|
||||
found in the GF tutorial available on line on the GF web page,
|
||||
</P>
|
||||
<P>
|
||||
<A HREF="http://digitalgrammars.com/gf"><CODE>digitalgrammars.com/gf</CODE></A>
|
||||
<A HREF="http://grammaticalframework.org"><CODE>grammaticalframework.org</CODE></A>
|
||||
</P>
|
||||
<P>
|
||||
This manual covers only the language, not the GF compiler or
|
||||
|
||||
@@ -1,289 +0,0 @@
|
||||
(Adapted from KeY statistics by Vladimir Klebanov)
|
||||
|
||||
This is GF right now:
|
||||
|
||||
Total Physical Source Lines of Code (SLOC) = 42,467
|
||||
|
||||
Development Effort Estimate, Person-Years (Person-Months) = 10.24 (122.932)
|
||||
(Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05))
|
||||
|
||||
Schedule Estimate, Years (Months) = 1.30 (15.56)
|
||||
(Basic COCOMO model, Months = 2.5 * (person-months**0.38))
|
||||
|
||||
Estimated Average Number of Developers (Effort/Schedule) = 7.90
|
||||
|
||||
Total Estimated Cost to Develop = $ 1,383,870
|
||||
(average salary = $56,286/year, overhead = 2.40).
|
||||
|
||||
SLOCCount, Copyright (C) 2001-2004 David A. Wheeler
|
||||
|
||||
|
||||
|
||||
----------- basis of counting: Haskell code + BNFC code - generated Happy parsers
|
||||
|
||||
-- GF/src% wc -l *.hs GF/*.hs GF/*/*.hs GF/*/*/*.hs GF/*/*.cf JavaGUI/*.java
|
||||
-- date Fri Jun 3 10:00:31 CEST 2005
|
||||
|
||||
104 GF.hs
|
||||
402 GF/API.hs
|
||||
98 GF/GFModes.hs
|
||||
379 GF/Shell.hs
|
||||
4 GF/Today.hs
|
||||
43 GF/API/BatchTranslate.hs
|
||||
145 GF/API/GrammarToHaskell.hs
|
||||
77 GF/API/IOGrammar.hs
|
||||
25 GF/API/MyParser.hs
|
||||
177 GF/Canon/AbsGFC.hs
|
||||
37 GF/Canon/ByLine.hs
|
||||
192 GF/Canon/CanonToGrammar.hs
|
||||
293 GF/Canon/CMacros.hs
|
||||
79 GF/Canon/GetGFC.hs
|
||||
86 GF/Canon/GFC.hs
|
||||
291 GF/Canon/LexGFC.hs
|
||||
201 GF/Canon/Look.hs
|
||||
235 GF/Canon/MkGFC.hs
|
||||
46 GF/Canon/PrExp.hs
|
||||
352 GF/Canon/PrintGFC.hs
|
||||
147 GF/Canon/Share.hs
|
||||
207 GF/Canon/SkelGFC.hs
|
||||
46 GF/Canon/TestGFC.hs
|
||||
49 GF/Canon/Unlex.hs
|
||||
202 GF/CF/CanonToCF.hs
|
||||
213 GF/CF/CF.hs
|
||||
217 GF/CF/CFIdent.hs
|
||||
62 GF/CF/CFtoGrammar.hs
|
||||
47 GF/CF/CFtoSRG.hs
|
||||
206 GF/CF/ChartParser.hs
|
||||
191 GF/CF/EBNF.hs
|
||||
45 GF/CFGM/AbsCFG.hs
|
||||
312 GF/CFGM/LexCFG.hs
|
||||
157 GF/CFGM/PrintCFG.hs
|
||||
109 GF/CFGM/PrintCFGrammar.hs
|
||||
85 GF/CF/PPrCF.hs
|
||||
150 GF/CF/PrLBNF.hs
|
||||
106 GF/CF/Profile.hs
|
||||
141 GF/Compile/BackOpt.hs
|
||||
763 GF/Compile/CheckGrammar.hs
|
||||
337 GF/Compile/Compile.hs
|
||||
136 GF/Compile/Extend.hs
|
||||
124 GF/Compile/GetGrammar.hs
|
||||
282 GF/Compile/GrammarToCanon.hs
|
||||
93 GF/Compile/MkConcrete.hs
|
||||
128 GF/Compile/MkResource.hs
|
||||
83 GF/Compile/MkUnion.hs
|
||||
146 GF/Compile/ModDeps.hs
|
||||
294 GF/Compile/NewRename.hs
|
||||
227 GF/Compile/Optimize.hs
|
||||
76 GF/Compile/PGrammar.hs
|
||||
84 GF/Compile/PrOld.hs
|
||||
119 GF/Compile/Rebuild.hs
|
||||
63 GF/Compile/RemoveLiT.hs
|
||||
274 GF/Compile/Rename.hs
|
||||
535 GF/Compile/ShellState.hs
|
||||
135 GF/Compile/Update.hs
|
||||
129 GF/Conversion/GFC.hs
|
||||
149 GF/Conversion/GFCtoSimple.hs
|
||||
53 GF/Conversion/MCFGtoCFG.hs
|
||||
46 GF/Conversion/RemoveEpsilon.hs
|
||||
102 GF/Conversion/RemoveErasing.hs
|
||||
82 GF/Conversion/RemoveSingletons.hs
|
||||
137 GF/Conversion/SimpleToFinite.hs
|
||||
26 GF/Conversion/SimpleToMCFG.hs
|
||||
230 GF/Conversion/Types.hs
|
||||
143 GF/Data/Assoc.hs
|
||||
118 GF/Data/BacktrackM.hs
|
||||
20 GF/Data/ErrM.hs
|
||||
119 GF/Data/GeneralDeduction.hs
|
||||
30 GF/Data/Glue.hs
|
||||
67 GF/Data/IncrementalDeduction.hs
|
||||
61 GF/Data/Map.hs
|
||||
662 GF/Data/Operations.hs
|
||||
127 GF/Data/OrdMap2.hs
|
||||
120 GF/Data/OrdSet.hs
|
||||
193 GF/Data/Parsers.hs
|
||||
64 GF/Data/RedBlack.hs
|
||||
150 GF/Data/RedBlackSet.hs
|
||||
19 GF/Data/SharedString.hs
|
||||
127 GF/Data/SortedList.hs
|
||||
134 GF/Data/Str.hs
|
||||
120 GF/Data/Trie2.hs
|
||||
129 GF/Data/Trie.hs
|
||||
71 GF/Data/Utilities.hs
|
||||
243 GF/Data/Zipper.hs
|
||||
78 GF/Embed/EmbedAPI.hs
|
||||
113 GF/Embed/EmbedCustom.hs
|
||||
137 GF/Embed/EmbedParsing.hs
|
||||
50 GF/Formalism/CFG.hs
|
||||
51 GF/Formalism/GCFG.hs
|
||||
58 GF/Formalism/MCFG.hs
|
||||
246 GF/Formalism/SimpleGFC.hs
|
||||
349 GF/Formalism/Utilities.hs
|
||||
30 GF/Fudgets/ArchEdit.hs
|
||||
134 GF/Fudgets/CommandF.hs
|
||||
51 GF/Fudgets/EventF.hs
|
||||
59 GF/Fudgets/FudgetOps.hs
|
||||
37 GF/Fudgets/UnicodeF.hs
|
||||
86 GF/Grammar/AbsCompute.hs
|
||||
38 GF/Grammar/Abstract.hs
|
||||
149 GF/Grammar/AppPredefined.hs
|
||||
312 GF/Grammar/Compute.hs
|
||||
215 GF/Grammar/Grammar.hs
|
||||
46 GF/Grammar/Lockfield.hs
|
||||
189 GF/Grammar/LookAbs.hs
|
||||
182 GF/Grammar/Lookup.hs
|
||||
745 GF/Grammar/Macros.hs
|
||||
340 GF/Grammar/MMacros.hs
|
||||
115 GF/Grammar/PatternMatch.hs
|
||||
279 GF/Grammar/PrGrammar.hs
|
||||
121 GF/Grammar/Refresh.hs
|
||||
44 GF/Grammar/ReservedWords.hs
|
||||
251 GF/Grammar/TC.hs
|
||||
301 GF/Grammar/TypeCheck.hs
|
||||
96 GF/Grammar/Unify.hs
|
||||
101 GF/Grammar/Values.hs
|
||||
89 GF/Infra/CheckM.hs
|
||||
43 GF/Infra/Comments.hs
|
||||
152 GF/Infra/Ident.hs
|
||||
390 GF/Infra/Modules.hs
|
||||
358 GF/Infra/Option.hs
|
||||
179 GF/Infra/Print.hs
|
||||
331 GF/Infra/ReadFiles.hs
|
||||
337 GF/Infra/UseIO.hs
|
||||
153 GF/OldParsing/CFGrammar.hs
|
||||
283 GF/OldParsing/ConvertFiniteGFC.hs
|
||||
121 GF/OldParsing/ConvertFiniteSimple.hs
|
||||
34 GF/OldParsing/ConvertGFCtoMCFG.hs
|
||||
122 GF/OldParsing/ConvertGFCtoSimple.hs
|
||||
44 GF/OldParsing/ConvertGrammar.hs
|
||||
52 GF/OldParsing/ConvertMCFGtoCFG.hs
|
||||
30 GF/OldParsing/ConvertSimpleToMCFG.hs
|
||||
43 GF/OldParsing/GCFG.hs
|
||||
86 GF/OldParsing/GeneralChart.hs
|
||||
148 GF/OldParsing/GrammarTypes.hs
|
||||
50 GF/OldParsing/IncrementalChart.hs
|
||||
206 GF/OldParsing/MCFGrammar.hs
|
||||
43 GF/OldParsing/ParseCFG.hs
|
||||
82 GF/OldParsing/ParseCF.hs
|
||||
177 GF/OldParsing/ParseGFC.hs
|
||||
37 GF/OldParsing/ParseMCFG.hs
|
||||
161 GF/OldParsing/SimpleGFC.hs
|
||||
188 GF/OldParsing/Utilities.hs
|
||||
51 GF/Parsing/CFG.hs
|
||||
66 GF/Parsing/CF.hs
|
||||
151 GF/Parsing/GFC.hs
|
||||
64 GF/Parsing/MCFG.hs
|
||||
83 GF/Printing/PrintParser.hs
|
||||
127 GF/Printing/PrintSimplifiedTerm.hs
|
||||
190 GF/Shell/CommandL.hs
|
||||
556 GF/Shell/Commands.hs
|
||||
524 GF/Shell/HelpFile.hs
|
||||
79 GF/Shell/JGF.hs
|
||||
171 GF/Shell/PShell.hs
|
||||
221 GF/Shell/ShellCommands.hs
|
||||
66 GF/Shell/SubShell.hs
|
||||
87 GF/Shell/TeachYourself.hs
|
||||
296 GF/Source/AbsGF.hs
|
||||
229 GF/Source/GrammarToSource.hs
|
||||
312 GF/Source/LexGF.hs
|
||||
528 GF/Source/PrintGF.hs
|
||||
353 GF/Source/SkelGF.hs
|
||||
657 GF/Source/SourceToGrammar.hs
|
||||
58 GF/Source/TestGF.hs
|
||||
72 GF/Speech/PrGSL.hs
|
||||
65 GF/Speech/PrJSGF.hs
|
||||
128 GF/Speech/SRG.hs
|
||||
103 GF/Speech/TransformCFG.hs
|
||||
30 GF/System/ArchEdit.hs
|
||||
90 GF/System/Arch.hs
|
||||
27 GF/System/NoReadline.hs
|
||||
27 GF/System/Readline.hs
|
||||
73 GF/System/Tracing.hs
|
||||
25 GF/System/UseReadline.hs
|
||||
63 GF/Text/Arabic.hs
|
||||
97 GF/Text/Devanagari.hs
|
||||
72 GF/Text/Ethiopic.hs
|
||||
99 GF/Text/ExtendedArabic.hs
|
||||
37 GF/Text/ExtraDiacritics.hs
|
||||
172 GF/Text/Greek.hs
|
||||
53 GF/Text/Hebrew.hs
|
||||
95 GF/Text/Hiragana.hs
|
||||
69 GF/Text/LatinASupplement.hs
|
||||
47 GF/Text/OCSCyrillic.hs
|
||||
45 GF/Text/Russian.hs
|
||||
77 GF/Text/Tamil.hs
|
||||
125 GF/Text/Text.hs
|
||||
69 GF/Text/Unicode.hs
|
||||
47 GF/Text/UTF8.hs
|
||||
56 GF/Translate/GFT.hs
|
||||
427 GF/UseGrammar/Custom.hs
|
||||
435 GF/UseGrammar/Editing.hs
|
||||
180 GF/UseGrammar/Generate.hs
|
||||
71 GF/UseGrammar/GetTree.hs
|
||||
143 GF/UseGrammar/Information.hs
|
||||
228 GF/UseGrammar/Linear.hs
|
||||
130 GF/UseGrammar/Morphology.hs
|
||||
70 GF/UseGrammar/Paraphrases.hs
|
||||
157 GF/UseGrammar/Parsing.hs
|
||||
66 GF/UseGrammar/Randomized.hs
|
||||
170 GF/UseGrammar/Session.hs
|
||||
186 GF/UseGrammar/Tokenize.hs
|
||||
43 GF/UseGrammar/Transfer.hs
|
||||
122 GF/Visualization/NewVisualizationGrammar.hs
|
||||
123 GF/Visualization/VisualizeGrammar.hs
|
||||
63 GF/Conversion/SimpleToMCFG/Coercions.hs
|
||||
256 GF/Conversion/SimpleToMCFG/Nondet.hs
|
||||
129 GF/Conversion/SimpleToMCFG/Strict.hs
|
||||
71 GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs
|
||||
281 GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs
|
||||
277 GF/OldParsing/ConvertGFCtoMCFG/Old.hs
|
||||
189 GF/OldParsing/ConvertGFCtoMCFG/Strict.hs
|
||||
70 GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs
|
||||
245 GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs
|
||||
277 GF/OldParsing/ConvertSimpleToMCFG/Old.hs
|
||||
139 GF/OldParsing/ConvertSimpleToMCFG/Strict.hs
|
||||
83 GF/OldParsing/ParseCFG/General.hs
|
||||
142 GF/OldParsing/ParseCFG/Incremental.hs
|
||||
156 GF/OldParsing/ParseMCFG/Basic.hs
|
||||
103 GF/Parsing/CFG/General.hs
|
||||
150 GF/Parsing/CFG/Incremental.hs
|
||||
98 GF/Parsing/CFG/PInfo.hs
|
||||
226 GF/Parsing/MCFG/Active2.hs
|
||||
304 GF/Parsing/MCFG/Active.hs
|
||||
144 GF/Parsing/MCFG/Incremental2.hs
|
||||
163 GF/Parsing/MCFG/Incremental.hs
|
||||
128 GF/Parsing/MCFG/Naive.hs
|
||||
163 GF/Parsing/MCFG/PInfo.hs
|
||||
194 GF/Parsing/MCFG/Range.hs
|
||||
183 GF/Parsing/MCFG/ViaCFG.hs
|
||||
167 GF/Canon/GFC.cf
|
||||
36 GF/CFGM/CFG.cf
|
||||
321 GF/Source/GF.cf
|
||||
272 JavaGUI/DynamicTree2.java
|
||||
272 JavaGUI/DynamicTree.java
|
||||
2357 JavaGUI/GFEditor2.java
|
||||
1420 JavaGUI/GFEditor.java
|
||||
30 JavaGUI/GrammarFilter.java
|
||||
13 JavaGUI/LinPosition.java
|
||||
18 JavaGUI/MarkedArea.java
|
||||
1552 JavaGUI/Numerals.java
|
||||
22 JavaGUI/Utils.java
|
||||
5956 total
|
||||
48713 total
|
||||
|
||||
- 2131 GF/Canon/ParGFC.hs
|
||||
3336 GF/Source/ParGF.hs
|
||||
779 GF/CFGM/ParCFG.hs
|
||||
|
||||
42467 total
|
||||
|
||||
--------
|
||||
|
||||
sloccount sloc =
|
||||
let
|
||||
ksloc = sloc / 1000
|
||||
effort = 2.4 * (ksloc ** 1.05)
|
||||
schedule = 2.5 * (effort ** 0.38)
|
||||
develops = effort / schedule
|
||||
cost = 56286 * (effort/12) * 2.4
|
||||
in
|
||||
[sloc,ksloc,effort,effort/12,schedule,schedule/12,develops,cost]
|
||||
@@ -1,533 +0,0 @@
|
||||
GF Resource Grammar Summer School
|
||||
Gothenburg, 17-28 August 2009
|
||||
Aarne Ranta (aarne at chalmers.se)
|
||||
|
||||
%!Encoding : iso-8859-1
|
||||
|
||||
%!target:html
|
||||
%!postproc(html): #BECE <center>
|
||||
%!postproc(html): #ENCE </center>
|
||||
%!postproc(html): #GRAY <font color="green" size="-1">
|
||||
%!postproc(html): #EGRAY </font>
|
||||
%!postproc(html): #RED <font color="red">
|
||||
%!postproc(html): #YELLOW <font color="orange">
|
||||
%!postproc(html): #ERED </font>
|
||||
|
||||
#BECE
|
||||
[school-langs.png]
|
||||
#ENCE
|
||||
|
||||
|
||||
//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu//
|
||||
|
||||
|
||||
==News==
|
||||
|
||||
An on-line course //GF for Resource Grammar Writers// will start on
|
||||
Monday 20 April at 15.30 CEST. The slides and recordings of the five
|
||||
45-minute lectures will be made available via this web page. If requested,
|
||||
the course may be repeated in the beginning of the summer school.
|
||||
|
||||
|
||||
==Executive summary==
|
||||
|
||||
GF Resource Grammar Library is an open-source computational grammar resource
|
||||
that currently covers 12 languages.
|
||||
The Summer School is a part of a collaborative effort to extend the library
|
||||
to all of the 23 official EU languages. Also other languages
|
||||
chosen by the participants are welcome.
|
||||
|
||||
The missing EU languages are:
|
||||
Czech, Dutch, Estonian, Greek, Hungarian, Irish, Latvian, Lithuanian,
|
||||
Maltese, Portuguese, Slovak, and Slovenian. There is also more work to
|
||||
be done on Polish and Romanian.
|
||||
|
||||
The linguistic coverage of the library includes the inflectional morphology
|
||||
and basic syntax of each language. It can be used in GF applications
|
||||
and also ported to other formats. It can also be used for building other
|
||||
linguistic resources, such as morphological lexica and parsers.
|
||||
The library is licensed under LGPL.
|
||||
|
||||
In the summer school, each language will be implemented by one or two students
|
||||
working together. A morphology implementation will be credited
|
||||
as a Chalmers course worth 7.5 ETCS points; adding a syntax implementation
|
||||
will be worth more. The estimated total work load is 1-2 months for the
|
||||
morphology, and 3-6 months for the whole grammar.
|
||||
|
||||
Participation in the course is free. Registration is done via the courses's
|
||||
Google group, [``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]. The registration deadline is 15 June 2009.
|
||||
|
||||
Some travel grants will be available. They are distributed on the basis of a
|
||||
GF programming contest in April and May.
|
||||
|
||||
The summer school will be held on 17-28 August 2009, at the campus of
|
||||
Chalmers University of Technology in Gothenburg, Sweden.
|
||||
|
||||
|
||||
[align6.png]
|
||||
|
||||
//Word alignment produced by GF from the resource grammar in Bulgarian, English, Italian, German, Finnish, French, and Swedish.//
|
||||
|
||||
==Introduction==
|
||||
|
||||
Since 2007, EU-27 has 23 official languages, listed in the diagram on top of this
|
||||
document. There is a growing need of linguistic resources for these
|
||||
languages, to help in tasks such as translation and information retrieval.
|
||||
These resources should be **portable** and **freely accessible**.
|
||||
Languages marked in red in the diagram are of particular interest for
|
||||
the summer school, since they are those on which the effort will be concentrated.
|
||||
|
||||
GF (Grammatical Framework,
|
||||
[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf])
|
||||
is a **functional programming language** designed for writing natural
|
||||
language grammars. It provides an efficient platform for this task, due to
|
||||
its modern characteristics:
|
||||
- It is a functional programming language, similar to Haskell and ML.
|
||||
- It has a static type system and type checker.
|
||||
- It has a powerful module system supporting separate compilation
|
||||
and data abstraction.
|
||||
- It has an optimizing compiler to **Portable Grammar Format** (PGF).
|
||||
- PGF can be further compiled to other formats, such as JavaScript and
|
||||
speech recognition language models.
|
||||
- GF has a **resource grammar library** giving access to the morphology and
|
||||
basic syntax of 12 languages.
|
||||
|
||||
|
||||
In addition to "ordinary" grammars for single languages, GF
|
||||
supports **multilingual grammars**. A multilingual GF grammar consists of an
|
||||
**abstract syntax** and a set of **concrete syntaxes**.
|
||||
An abstract syntax is system of **trees**, serving as a semantic
|
||||
model or an ontology. A concrete syntax is a mapping from abstract syntax
|
||||
trees to strings of a particular language.
|
||||
|
||||
These mappings defined in concrete syntax are **reversible**: they
|
||||
can be used both for **generating** strings from trees, and for
|
||||
**parsing** strings into trees. Combinations of generation and
|
||||
parsing can be used for **translation**, where the abstract
|
||||
syntax works as an **interlingua**. Thus GF has been used as a
|
||||
framework for building translation systems in several areas
|
||||
of application and large sets of languages.
|
||||
|
||||
|
||||
|
||||
==The GF resource grammar library==
|
||||
|
||||
The GF resource grammar library is a set of grammars usable as libraries when
|
||||
building translation systems and other applications.
|
||||
The library currently covers
|
||||
the 9 languages coloured in green in the diagram above; in addition,
|
||||
Catalan, Norwegian, and Russian are covered, and there is ongoing work on
|
||||
Arabic, Hindi/Urdu, Polish, Romanian, and Thai.
|
||||
|
||||
The purpose of the resource grammar library is to define the "low-level" structure
|
||||
of a language: inflection, word order, agreement. This structure belongs to what
|
||||
linguists call morphology and syntax. It can be very complex and requires
|
||||
a lot of knowledge. Yet, when translating from one language to
|
||||
another, knowing morphology and syntax is but a part of what is needed.
|
||||
The translator (whether human
|
||||
or machine) must understand the meaning of what is translated, and must also know
|
||||
the idiomatic way to express the meaning in the target language. This knowledge
|
||||
can be very domain-dependent and requires in general an expert in the field to
|
||||
reach high quality: a mathematician in the field of mathematics, a meteorologist
|
||||
in the field of weather reports, etc.
|
||||
|
||||
The problem is to find a person who is an expert in both the domain of translation
|
||||
and in the low-level linguistic details. It is the rareness of this combination
|
||||
that has made it difficult to build interlingua-based translation systems.
|
||||
The GF resource grammar library has the mission of helping in this task.
|
||||
It encapsulates the low-level linguistics in program modules
|
||||
accessed through easy-to-use interfaces.
|
||||
Experts on different domains can build translation systems by using the library,
|
||||
without knowing low-level linguistics. The idea is much the same as when a
|
||||
programmer builds a graphical user interface (GUI) from high-level elements such as
|
||||
buttons and menus, without having to care about pixels or geometrical forms.
|
||||
|
||||
|
||||
===Missing EU languages, by the family===
|
||||
|
||||
Writing a grammar for a language is usually easier if other languages
|
||||
from the same family already have grammars. The colours have the same
|
||||
meaning as in the diagram above.
|
||||
|
||||
Baltic:
|
||||
#RED Latvian #ERED
|
||||
#RED Lithuanian #ERED
|
||||
|
||||
Celtic:
|
||||
#RED Irish #ERED
|
||||
|
||||
Fenno-Ugric:
|
||||
#RED Estonian #ERED
|
||||
#GRAY Finnish #EGRAY
|
||||
#RED Hungarian #ERED
|
||||
|
||||
Germanic:
|
||||
#GRAY Danish #EGRAY
|
||||
#RED Dutch #ERED
|
||||
#GRAY English #EGRAY
|
||||
#GRAY German #EGRAY
|
||||
#GRAY Swedish #EGRAY
|
||||
|
||||
Hellenic:
|
||||
#RED Greek #ERED
|
||||
|
||||
Romance:
|
||||
#GRAY French #EGRAY
|
||||
#GRAY Italian #EGRAY
|
||||
#RED Portuguese #ERED
|
||||
#YELLOW Romanian #ERED
|
||||
#GRAY Spanish #EGRAY
|
||||
|
||||
Semitic:
|
||||
#RED Maltese #ERED
|
||||
|
||||
Slavonic:
|
||||
#GRAY Bulgarian #EGRAY
|
||||
#RED Czech #ERED
|
||||
#YELLOW Polish #ERED
|
||||
#RED Slovak #ERED
|
||||
#RED Slovenian #ERED
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
===Applications of the library===
|
||||
|
||||
In addition to translation, the library is also useful in **localization**,
|
||||
that is, porting a piece of software to new languages.
|
||||
The GF resource grammar library has been used in three major projects that need
|
||||
interlingua-based translation or localization of systems to new languages:
|
||||
- in KeY,
|
||||
[``http://www.key-project.org/`` http://www.key-project.org/],
|
||||
for writing formal and informal software specifications (3 languages)
|
||||
- in WebALT,
|
||||
[``http://webalt.math.helsinki.fi/content/index_eng.html`` http://webalt.math.helsinki.fi/content/index_eng.html],
|
||||
for translating mathematical exercises to 7 languages
|
||||
- in TALK [``http://www.talk-project.org`` http://www.talk-project.org],
|
||||
where the library was used for localizing spoken dialogue systems
|
||||
to six languages
|
||||
|
||||
|
||||
The library is also a generic **linguistic resource**,
|
||||
which can be used for tasks
|
||||
such as language teaching and information retrieval. The liberal license (LGPL)
|
||||
makes it usable for anyone and for any task. GF also has tools supporting the
|
||||
use of grammars in programs written in other
|
||||
programming languages: C, C++, Haskell,
|
||||
Java, JavaScript, and Prolog. In connection with the TALK project,
|
||||
support has also been
|
||||
developed for translating GF grammars to language models used in speech
|
||||
recognition (GSL/Nuance, HTK/ATK, SRGS, JSGF).
|
||||
|
||||
|
||||
|
||||
===The structure of the library===
|
||||
|
||||
The library has the following main parts:
|
||||
- **Inflection paradigms**, covering the inflection of each language.
|
||||
- **Core Syntax**, covering a large set of syntax rule that
|
||||
can be implemented for all languages involved.
|
||||
- **Common Test Lexicon**, giving ca. 500 common words that can be used for
|
||||
testing the library.
|
||||
- **Language-Specific Syntax Extensions**, covering syntax rules that are
|
||||
not implementable for all languages.
|
||||
- **Language-Specific Lexica**, word lists for each language, with
|
||||
accurate morphological and syntactic information.
|
||||
|
||||
|
||||
The goal of the summer school is to implement, for each language, at least
|
||||
the first three components. The latter three are more open-ended in character.
|
||||
|
||||
|
||||
==The summer school==
|
||||
|
||||
The goal of the summer school is to extend the GF resource grammar library
|
||||
to covering all 23 EU languages, which means we need 15 new languages.
|
||||
We also welcome other languages than these 23,
|
||||
if there are interested participants.
|
||||
|
||||
The amount of work and skill is between a Master's thesis and a PhD thesis.
|
||||
The Russian implementation was made by Janna Khegai as a part of her
|
||||
PhD thesis; the thesis contains other material, too.
|
||||
The Arabic implementation was started by Ali El Dada in his Master's thesis,
|
||||
but the thesis does not cover the whole API. The realistic amount of work is
|
||||
somewhere between 3 and 8 person months,
|
||||
but this is very much language-dependent.
|
||||
Dutch, for instance, can profit from previous implementations of German and
|
||||
Scandinavian languages, and will probably require less work.
|
||||
Latvian and Lithuanian are the first languages of the Baltic family and
|
||||
will probably require more work.
|
||||
|
||||
In any case, the proposed allocation of work power is 2 participants per
|
||||
language. They will do 1 months' worth of home work, followed
|
||||
by 2 weeks of summer school, followed by 4 months work at home.
|
||||
Who are these participants?
|
||||
|
||||
|
||||
===Selecting participants===
|
||||
|
||||
Persons interested to participate in the Summer School should sign up in
|
||||
the **Google Group** of the course,
|
||||
|
||||
[``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]
|
||||
|
||||
The registration deadline is 15 June 2009.
|
||||
|
||||
Notice: you can sign up in the Google
|
||||
group even if you are not planning to attend the summer school, but are
|
||||
just interested in the topic. There will be a separate registration to the
|
||||
school itself later.
|
||||
|
||||
The participants are recommended to learn GF in advance, by self-study from the
|
||||
[tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html].
|
||||
This should take a couple of weeks. An **on-line course** will be
|
||||
arranged on 20-29 April to help in getting started with GF.
|
||||
|
||||
At the end of the on-line course, a **programming assignment** will be published.
|
||||
This assignment will test skills required in resource grammar programming.
|
||||
Work on the assignment will take a couple of weeks.
|
||||
Those who are interested in getting a travel grant will submit
|
||||
their sample resource grammar fragment
|
||||
to the Summer School Committee by 12 May.
|
||||
The Committee then decides who is given a travel grant of up to 1000 EUR.
|
||||
|
||||
Notice: you can participate in the summer school without following the on-line
|
||||
course or participating in the contest. These things are required only if you
|
||||
want a travel grant. If requested by enough many participants, the lectures of
|
||||
the on-line course will be repeated in the beginning of the summer school.
|
||||
|
||||
The summer school itself is devoted for working on resource grammars.
|
||||
In addition to grammar writing itself, testing and evaluation is
|
||||
performed. One way to do this is via adding new languages
|
||||
to resource grammar applications - in particular, to the WebALT mathematical
|
||||
exercise translator.
|
||||
|
||||
The resource grammars are expected to be completed by December 2009. They will
|
||||
be published at GF website and licensed under LGPL.
|
||||
|
||||
The participants are encouraged to contact each other and even work in groups.
|
||||
|
||||
|
||||
|
||||
===Who is qualified===
|
||||
|
||||
Writing a resource grammar implementation requires good general programming
|
||||
skills, and a good explicit knowledge of the grammar of the target language.
|
||||
A typical participant could be
|
||||
- native or fluent speaker of the target language
|
||||
- interested in languages on the theoretical level, and preferably familiar
|
||||
with many languages (to be able to think about them on an abstract level)
|
||||
- familiar with functional programming languages such as ML or Haskell
|
||||
(GF itself is a language similar to these)
|
||||
- on Master's or PhD level in linguistics, computer science, or mathematics
|
||||
|
||||
|
||||
But it is the quality of the assignment that is assessed, not any formal
|
||||
requirements. The "typical participant" was described to give an idea of
|
||||
who is likely to succeed in this.
|
||||
|
||||
|
||||
===Costs===
|
||||
|
||||
The summer school is free of charge.
|
||||
|
||||
Some travel grants are given, on the basis of a programming contest,
|
||||
to cover travel and accommodation costs up to 1000 EUR
|
||||
per person.
|
||||
|
||||
The number of grants will be decided during Spring 2009, and the grand
|
||||
holders will be notified before the beginning of June.
|
||||
|
||||
Special terms will apply to students in
|
||||
[GSLT http://www.gslt.hum.gu.se/] and
|
||||
[NGSLT http://ngslt.org/].
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
===Teachers===
|
||||
|
||||
A list of teachers will be published here later. Some of the local teachers
|
||||
probably involved are the following:
|
||||
- Krasimir Angelov
|
||||
- Robin Cooper
|
||||
- Håkan Burden
|
||||
- Markus Forsberg
|
||||
- Harald Hammarström
|
||||
- Peter Ljunglöf
|
||||
- Aarne Ranta
|
||||
|
||||
|
||||
More teachers are welcome! If you are interested, please contact us so that
|
||||
we can discuss your involvement and travel arrangements.
|
||||
|
||||
In addition to teachers, we will look for consultants who can help to assess
|
||||
the results for each language. Please contact us!
|
||||
|
||||
|
||||
|
||||
===The Summer School Committee===
|
||||
|
||||
This committee consists of a number of teachers and informants,
|
||||
who will select the participants. It will be selected by April 2009.
|
||||
|
||||
|
||||
===Time and Place===
|
||||
|
||||
The summer school will
|
||||
be organized at the campus of Chalmers University of Technology in Gothenburg,
|
||||
Sweden, on 17-28 August 2009.
|
||||
|
||||
Time schedule:
|
||||
- February: announcement of summer school
|
||||
- 20-29 April: on-line course
|
||||
- 12 May: submission deadline for assignment work
|
||||
- 31 May: review of assignments, notifications of acceptance
|
||||
- 15 June: **registration deadline**
|
||||
- 17-28 August: Summer School
|
||||
- September-December: homework on resource grammars
|
||||
- December: release of the extended Resource Grammar Library
|
||||
|
||||
|
||||
===Dissemination and intellectual property===
|
||||
|
||||
The new resource grammars will be released under the LGPL just like
|
||||
the current resource grammars,
|
||||
with the copyright held by respective authors.
|
||||
|
||||
The grammars will be distributed via the GF web site.
|
||||
|
||||
|
||||
|
||||
==Why I should participate==
|
||||
|
||||
Seven reasons:
|
||||
+ participation in a pioneering language technology work in an
|
||||
enthusiastic atmosphere
|
||||
+ work and fun with people from all over Europe and the world
|
||||
+ job opportunities and business ideas
|
||||
+ credits: the school project will be established as a course at Chalmers worth
|
||||
7.5 or 15 ETCS points per person, depending on the work accompliched; also
|
||||
extensions to Master's thesis will be considered (special credit arrangements
|
||||
for [GSLT http://www.gslt.hum.gu.se/] and [NGSLT http://ngslt.org/])
|
||||
+ merits: the resulting grammar can easily lead to a published paper (see below)
|
||||
+ contribution to the multilingual and multicultural development of Europe and the
|
||||
world
|
||||
+ free trip and stay in Gothenburg (for travel grant students)
|
||||
|
||||
|
||||
==More information==
|
||||
|
||||
[Course Google Group http://groups.google.com/group/gf-resource-school-2009/]
|
||||
|
||||
[GF web page http://digitalgrammars.com/gf/]
|
||||
|
||||
[GF tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html]
|
||||
|
||||
[GF resource synopsis http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html]
|
||||
|
||||
[Resource-HOWTO document http://digitalgrammars.com/gf/doc/Resource-HOWTO.html]
|
||||
|
||||
|
||||
===Contact===
|
||||
|
||||
Håkan Burden: burden at chalmers se
|
||||
|
||||
Aarne Ranta: aarne at chalmers se
|
||||
|
||||
|
||||
|
||||
===Selected publications from earlier resource grammar projects===
|
||||
|
||||
K. Angelov.
|
||||
Type-Theoretical Bulgarian Grammar.
|
||||
In B. Nordström and A. Ranta (eds),
|
||||
//Advances in Natural Language Processing (GoTAL 2008)//,
|
||||
LNCS/LNAI 5221, Springer,
|
||||
2008.
|
||||
|
||||
B. Bringert.
|
||||
//Programming Language Techniques for Natural Language Applications//.
|
||||
Phd thesis, Computer Science, University of Gothenburg,
|
||||
2008.
|
||||
|
||||
A. El Dada and A. Ranta.
|
||||
Implementing an Open Source Arabic Resource Grammar in GF.
|
||||
In M. Mughazy (ed),
|
||||
//Perspectives on Arabic Linguistics XX. Papers from the Twentieth Annual Symposium on Arabic Linguistics, Kalamazoo, March 26//
|
||||
John Benjamins Publishing Company.
|
||||
2007.
|
||||
|
||||
A. El Dada.
|
||||
Implementation of the Arabic Numerals and their Syntax in GF.
|
||||
Computational Approaches to Semitic Languages: Common Issues and Resources,
|
||||
ACL-2007 Workshop,
|
||||
June 28, 2007, Prague.
|
||||
2007.
|
||||
|
||||
H. Hammarström and A. Ranta.
|
||||
Cardinal Numerals Revisited in GF.
|
||||
//Workshop on Numerals in the World's Languages//.
|
||||
Dept. of Linguistics Max Planck Institute for Evolutionary Anthropology, Leipzig,
|
||||
2004.
|
||||
|
||||
M. Humayoun, H. Hammarström, and A. Ranta.
|
||||
Urdu Morphology, Orthography and Lexicon Extraction.
|
||||
//CAASL-2: The Second Workshop on Computational Approaches to Arabic Script-based Languages//,
|
||||
July 21-22, 2007, LSA 2007 Linguistic Institute, Stanford University.
|
||||
2007.
|
||||
|
||||
K. Johannisson.
|
||||
//Formal and Informal Software Specifications.//
|
||||
Phd thesis, Computer Science, University of Gothenburg,
|
||||
2005.
|
||||
|
||||
J. Khegai.
|
||||
GF parallel resource grammars and Russian.
|
||||
In proceedings of ACL2006
|
||||
(The joint conference of the International Committee on Computational
|
||||
Linguistics and the Association for Computational Linguistics) (pp. 475-482),
|
||||
Sydney, Australia, July 2006.
|
||||
|
||||
J. Khegai.
|
||||
//Language engineering in Grammatical Framework (GF)//.
|
||||
Phd thesis, Computer Science, Chalmers University of Technology,
|
||||
2006.
|
||||
|
||||
W. Ng'ang'a.
|
||||
Multilingual content development for eLearning in Africa.
|
||||
eLearning Africa: 1st Pan-African Conference on ICT for Development,
|
||||
Education and Training. 24-26 May 2006, Addis Ababa, Ethiopia.
|
||||
2006.
|
||||
|
||||
N. Perera and A. Ranta.
|
||||
Dialogue System Localization with the GF Resource Grammar Library.
|
||||
//SPEECHGRAM 2007: ACL Workshop on Grammar-Based Approaches to Spoken Language Processing//,
|
||||
June 29, 2007, Prague.
|
||||
2007.
|
||||
|
||||
A. Ranta.
|
||||
Modular Grammar Engineering in GF.
|
||||
//Research on Language and Computation//,
|
||||
5:133-158, 2007.
|
||||
|
||||
A. Ranta.
|
||||
How predictable is Finnish morphology? An experiment on lexicon construction.
|
||||
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
|
||||
//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//,
|
||||
University of Uppsala,
|
||||
2008.
|
||||
|
||||
A. Ranta. Grammars as Software Libraries.
|
||||
To appear in
|
||||
Y. Bertot, G. Huet, J-J. Lévy, and G. Plotkin (eds.),
|
||||
//From Semantics to Computer Science//,
|
||||
Cambridge University Press, Cambridge, 2009.
|
||||
|
||||
A. Ranta and K. Angelov.
|
||||
Implementing Controlled Languages in GF.
|
||||
To appear in the proceedings of //CNL 2009//.
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
||||
<TITLE>GF 3.0</TITLE>
|
||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
||||
<P ALIGN="center"><CENTER><H1>GF 3.0</H1>
|
||||
<FONT SIZE="4">
|
||||
<I>Krasimir Angelov, Björn Bringert, and Aarne Ranta</I><BR>
|
||||
Beta release, 27 June 2008
|
||||
</FONT></CENTER>
|
||||
|
||||
<P>
|
||||
GF Version 3.0 is a major revision of GF. The source language is a superset of the
|
||||
language in 2.9, which means backward compatibility. But the target languages, the
|
||||
compiler implementation, and the functionalities (e.g. the shell) have undergone
|
||||
radical changes.
|
||||
</P>
|
||||
<H2>New features</H2>
|
||||
<P>
|
||||
Here is a summary of the main novelties visible to the user:
|
||||
</P>
|
||||
<UL>
|
||||
<LI><B>Size</B>: the source code and the executable binary size have gone
|
||||
down to about the half of 2.9.
|
||||
<LI><B>Portability</B>: the new back end format PGF (Portable Grammar Format) is
|
||||
much simpler than the old GFC format, and therefore easier to port to new
|
||||
platforms.
|
||||
<LI><B>Multilingual web page support</B>: as an example of portability, GF 3.0 provides a
|
||||
compiler from PGF to JavaScript. There are also JavaScript libraries for creating
|
||||
translators and syntax editors as client-side web applications.
|
||||
<LI><B>Incremental parsing</B>: there is a possibility of word completion when
|
||||
input strings are sent to the parser.
|
||||
<LI><B>Application programmer's interfaces</B>: both source-GF and PGF formats,
|
||||
the shell, and the compiler are accessible via high-level APIs.
|
||||
<LI><B>Resource library version 1.4</B>: more coverage, more languages; some of
|
||||
the new GF language features are exploited.
|
||||
<LI><B>Uniform character encoding</B>: UTF8 in generated files, user-definable in
|
||||
source files
|
||||
</UL>
|
||||
|
||||
<H2>Non-supported features</H2>
|
||||
<P>
|
||||
There are some features of GF 2.9 that will <I>not</I> work in the 3.0 beta release.
|
||||
</P>
|
||||
<UL>
|
||||
<LI>Java Editor GUI: we now see the JavaScript editor as the main form of
|
||||
syntax editing.
|
||||
<LI>Pre-module multi-file grammar format: the grammar format of GF before version 2.0
|
||||
is still not yet supported.
|
||||
<LI>Context-free and EBNF input grammar formats.
|
||||
<LI>Probabilistic GF grammars.
|
||||
<LI>Some output formats: LBNF.
|
||||
<LI>Some GF shell commands: while the main ones will be supported with their familiar
|
||||
syntax and options, some old commands have not been included. The GF shell
|
||||
command <CODE>help -changes</CODE> gives the actual list.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Users who want to have these features are welcome to contact us,
|
||||
and even more welcome to contribute code that restores them!
|
||||
</P>
|
||||
<H2>GF language extensions</H2>
|
||||
<P>
|
||||
Operations for defining patterns.
|
||||
</P>
|
||||
<P>
|
||||
Inheritance of overload groups.
|
||||
</P>
|
||||
|
||||
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
|
||||
<!-- cmdline: txt2tags -thtml doc/gf3-release.txt -->
|
||||
</BODY></HTML>
|
||||
@@ -1,58 +0,0 @@
|
||||
GF 3.0
|
||||
Krasimir Angelov, Björn Bringert, and Aarne Ranta
|
||||
Beta release, 27 June 2008
|
||||
|
||||
|
||||
GF Version 3.0 is a major revision of GF. The source language is a superset of the
|
||||
language in 2.9, which means backward compatibility. But the target languages, the
|
||||
compiler implementation, and the functionalities (e.g. the shell) have undergone
|
||||
radical changes.
|
||||
|
||||
|
||||
==New features==
|
||||
|
||||
Here is a summary of the main novelties visible to the user:
|
||||
- **Size**: the source code and the executable binary size have gone
|
||||
down to about the half of 2.9.
|
||||
- **Portability**: the new back end format PGF (Portable Grammar Format) is
|
||||
much simpler than the old GFC format, and therefore easier to port to new
|
||||
platforms.
|
||||
- **Multilingual web page support**: as an example of portability, GF 3.0 provides a
|
||||
compiler from PGF to JavaScript. There are also JavaScript libraries for creating
|
||||
translators and syntax editors as client-side web applications.
|
||||
- **Incremental parsing**: there is a possibility of word completion when
|
||||
input strings are sent to the parser.
|
||||
- **Application programmer's interfaces**: both source-GF and PGF formats,
|
||||
the shell, and the compiler are accessible via high-level APIs.
|
||||
- **Resource library version 1.4**: more coverage, more languages; some of
|
||||
the new GF language features are exploited.
|
||||
- **Uniform character encoding**: UTF8 in generated files, user-definable in
|
||||
source files
|
||||
|
||||
|
||||
==Non-supported features==
|
||||
|
||||
There are some features of GF 2.9 that will //not// work in the 3.0 beta release.
|
||||
- Java Editor GUI: we now see the JavaScript editor as the main form of
|
||||
syntax editing.
|
||||
- Pre-module multi-file grammar format: the grammar format of GF before version 2.0
|
||||
is still not yet supported.
|
||||
- Context-free and EBNF input grammar formats.
|
||||
- Probabilistic GF grammars.
|
||||
- Some output formats: LBNF.
|
||||
- Some GF shell commands: while the main ones will be supported with their familiar
|
||||
syntax and options, some old commands have not been included. The GF shell
|
||||
command ``help -changes`` gives the actual list.
|
||||
|
||||
|
||||
Users who want to have these features are welcome to contact us,
|
||||
and even more welcome to contribute code that restores them!
|
||||
|
||||
|
||||
==GF language extensions==
|
||||
|
||||
Operations for defining patterns.
|
||||
|
||||
Inheritance of overload groups.
|
||||
|
||||
|
||||
161
doc/index.html
@@ -13,28 +13,20 @@
|
||||
<h1>Grammatical Framework Documents</h1>
|
||||
</center>
|
||||
|
||||
<b>Top-3 documents</b>:
|
||||
|
||||
<a href="gf-tutorial.html">Tutorial</a>
|
||||
|
||||
|
|
||||
|
||||
<a href="gf-refman.html">ReferenceManual</a>
|
||||
|
||||
|
|
||||
|
||||
<a href="../lib/resource/doc/synopsis.html">LibrarySynopsis</a>
|
||||
|
||||
|
||||
|
||||
<h2>Tutorials</h2>
|
||||
<b>Top-5 documents</b>:
|
||||
|
||||
<a href="gf-quickstart.html">Quick start instruction</a>.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="gf-tutorial.html">GF Tutorial</a>,
|
||||
Now up-to-date for GF version 2.9. Covers all of GF.
|
||||
<a href="tutorial/gf-tutorial.html">Old Tutorial</a>, application-oriented.
|
||||
|
||||
<a href="gf-lrec-2010.pdf">New Tutorial</a>, linguistics-oriented.
|
||||
|
||||
<a href="gf-refman.html">ReferenceManual</a>.
|
||||
|
||||
<a href="../lib/resource/doc/synopsis.html">LibrarySynopsis</a>.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -49,144 +41,13 @@ in a summary format.
|
||||
<a href="gf-refman.html">GF Reference Manual</a>. A full-scale reference
|
||||
manual of the GF language.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="gf-manual.html">
|
||||
User Manual</a> explaining the GF user interfaces and command language (slightly
|
||||
outdated).
|
||||
|
||||
<p>
|
||||
|
||||
<a href="../../GF2.0/doc/javaGUImanual/javaGUImanual.htm">Editor User Manual</a>
|
||||
on editing in the Java interface.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="gf-compiler.png">Chart of GF grammar compiler phases</a>.
|
||||
|
||||
|
||||
|
||||
<h2>Grammar library documentation</h2>
|
||||
|
||||
<a href="gf-tutorial.html#chapfive">Resource Grammar Tutorial Chapter</a>.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="../lib/resource/doc/synopsis.html">Resource Grammar Synopsis</a>
|
||||
for library users. With APIs and use examples.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="../lib/resource/doc/Resource-HOWTO.html">
|
||||
Resource Grammar HOWTO</a>
|
||||
for library authors.
|
||||
|
||||
|
||||
|
||||
|
||||
<h2>Embedding GF grammars in computer programs</h2>
|
||||
|
||||
<a href="gf-tutorial.html#chapeight">Embedded Grammar Tutorial Chapter</a>.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">
|
||||
Embedded GF Interpreter</a> manual for using GF grammars in Java programs.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="http://www.cs.chalmers.se/~aarne/GF/src/GF/GFCC/API.hs">
|
||||
Embedded GF API</a> for using GF grammars in Haskell programs.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="http://www.ling.gu.se/~peb/index.cgi/Software">
|
||||
MCFG/GF library for Prolog</a>,
|
||||
for using GF grammars in Prolog programs.
|
||||
|
||||
|
||||
|
||||
<h2>Theoretical studies</h2>
|
||||
|
||||
<a href="http://www.cs.chalmers.se/~aarne/articles/gf-jfp.ps.gz">
|
||||
Grammatical Framework: A Type-Theoretical
|
||||
Grammar Formalism</a> (ps.gz). Theoretical paper on GF by A. Ranta. A later
|
||||
version appeared
|
||||
in <i>The Journal of Functional Programming</i>, vol. 14:2. 2004, pp. 145-189.
|
||||
The standard reference on GF.
|
||||
|
||||
<p>
|
||||
|
||||
<a href="http://www.ling.gu.se/~peb/pubs/Ljunglof-2004a.pdf">
|
||||
Expressivity and Complexity of the Grammatical Framework</a>,
|
||||
PhD Thesis by
|
||||
<a href="http://www.ling.gu.se/~peb">Peter Ljunglöf</a>.
|
||||
|
||||
|
||||
|
||||
<h2>Introductory talks</h2>
|
||||
|
||||
<a href="http://www.cs.chalmers.se/~aarne/GF2.0/doc/short/gf-short.html">
|
||||
GF in 25 Minutes</a> - overview for computer science audience.
|
||||
|
||||
<p>
|
||||
|
||||
|
||||
<a href="http://www.cs.chalmers.se/~aarne/slides/gf-rocquencourt.pdf">
|
||||
Slides on GF theory and implementation</a> given
|
||||
at INRIA Rocquencourt in December 2003.
|
||||
|
||||
<p>
|
||||
|
||||
<a
|
||||
href="http://www.cs.chalmers.se/~aarne/slides/webalt-2005.pdf">
|
||||
Slides on example-based grammar writing</a> and a short introduction
|
||||
to GF grammars.
|
||||
|
||||
<p>
|
||||
|
||||
<a
|
||||
href="http://www.cs.chalmers.se/~aarne/course-langtech/lectures/lectures.html">
|
||||
Course notes on Natural Language Technology</a>, includes
|
||||
slides on using GF.
|
||||
|
||||
|
||||
|
||||
<h2>Examples and applications</h2>
|
||||
|
||||
<a href="http://www.cs.chalmers.se/~krijo/thesis/thesisA4.pdf">
|
||||
Formal and Informal Software Specifications</a>,
|
||||
PhD Thesis by
|
||||
<a href="http://www.cs.chalmers.se/~krijo">Kristofer Johannisson</a>.
|
||||
|
||||
|
||||
<p>
|
||||
|
||||
<a href="http://www.dtek.chalmers.se/~d00bring/publ/exjobb/embedded-grammars.pdf">
|
||||
Embedded grammars</a>,
|
||||
Master's thesis by
|
||||
<a href="http://www.cs.chalmers.se/~bringert/">Björn Bringert</a>
|
||||
|
||||
<p>
|
||||
|
||||
<a
|
||||
href="http://www.cs.chalmers.se/~bringert/misc/tramdemo.avi">Demo film</a>
|
||||
of a multimodal dialogue system built with embedded grammars.
|
||||
|
||||
|
||||
<p>
|
||||
|
||||
<a href="gfcc.pdf">
|
||||
GFCC</a> (pdf):
|
||||
report on a compiler from a fragment of C to JVM, written in GF.
|
||||
|
||||
|
||||
|
||||
<h2>More</h2>
|
||||
<h2>Publications</h2>
|
||||
|
||||
<a href="gf-bibliography.html">
|
||||
Bibliography</a>:
|
||||
more publications on GF, as well as background literature.
|
||||
Bibliography</a>: more publications on GF, as well as background literature.
|
||||
|
||||
|
||||
</body></html>
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
graph{
|
||||
|
||||
size = "8,8" ;
|
||||
|
||||
overlap = scale ;
|
||||
|
||||
"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ;
|
||||
|
||||
"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"1" -- "Abs" [style = "solid"];
|
||||
|
||||
"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"2" -- "Abs" [style = "solid"];
|
||||
|
||||
"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"3" -- "Abs" [style = "solid"];
|
||||
|
||||
"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"4" -- "Abs" [style = "solid"];
|
||||
|
||||
"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"5" -- "Abs" [style = "solid"];
|
||||
|
||||
"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"6" -- "Abs" [style = "solid"];
|
||||
|
||||
"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"7" -- "Abs" [style = "solid"];
|
||||
|
||||
"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"8" -- "Abs" [style = "solid"];
|
||||
|
||||
"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"9" -- "Abs" [style = "solid"];
|
||||
|
||||
"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"10" -- "Abs" [style = "solid"];
|
||||
|
||||
"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"11" -- "Abs" [style = "solid"];
|
||||
|
||||
"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "12" [style = "solid"];
|
||||
|
||||
"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "13" [style = "solid"];
|
||||
|
||||
"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "14" [style = "solid"];
|
||||
|
||||
"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "15" [style = "solid"];
|
||||
|
||||
"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "16" [style = "solid"];
|
||||
|
||||
"17" [label = "Polish", style = "solid", shape = "ellipse", color = "orange"] ;
|
||||
"Abs" -- "17" [style = "solid"];
|
||||
|
||||
"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "18" [style = "solid"];
|
||||
|
||||
"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "19" [style = "solid"];
|
||||
|
||||
"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "20" [style = "solid"];
|
||||
|
||||
"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "orange"] ;
|
||||
"Abs" -- "21" [style = "solid"];
|
||||
|
||||
"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"Abs" -- "22" [style = "solid"];
|
||||
|
||||
"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ;
|
||||
"Abs" -- "23" [style = "solid"];
|
||||
|
||||
"24" [label = "Catalan", style = "dotted", shape = "ellipse", color = "green"] ;
|
||||
"Abs" -- "24" [style = "solid"];
|
||||
|
||||
"25" [label = "Norwegian", style = "dotted", shape = "ellipse", color = "green"] ;
|
||||
"Abs" -- "25" [style = "solid"];
|
||||
|
||||
"26" [label = "Russian", style = "dotted", shape = "ellipse", color = "green"] ;
|
||||
"Abs" -- "26" [style = "solid"];
|
||||
|
||||
"27" [label = "Interlingua", style = "dotted", shape = "ellipse", color = "green"] ;
|
||||
"Abs" -- "27" [style = "solid"];
|
||||
|
||||
"28" [label = "Latin", style = "dotted", shape = "ellipse", color = "orange"] ;
|
||||
"Abs" -- "28" [style = "solid"];
|
||||
"29" [label = "Turkish", style = "dotted", shape = "ellipse", color = "orange"] ;
|
||||
"Abs" -- "29" [style = "solid"];
|
||||
"30" [label = "Hindi", style = "dotted", shape = "ellipse", color = "orange"] ;
|
||||
"Abs" -- "30" [style = "solid"];
|
||||
"31" [label = "Thai", style = "dotted", shape = "ellipse", color = "orange"] ;
|
||||
"Abs" -- "31" [style = "solid"];
|
||||
"32" [label = "Urdu", style = "dotted", shape = "ellipse", color = "orange"] ;
|
||||
"Abs" -- "32" [style = "solid"];
|
||||
"33" [label = "Telugu", style = "dotted", shape = "ellipse", color = "red"] ;
|
||||
"Abs" -- "33" [style = "solid"];
|
||||
"34" [label = "Arabic", style = "dotted", shape = "ellipse", color = "orange"] ;
|
||||
"Abs" -- "34" [style = "solid"];
|
||||
|
||||
|
||||
}
|
||||
|
Before Width: | Height: | Size: 129 KiB |
|
Before Width: | Height: | Size: 439 KiB |
|
Before Width: | Height: | Size: 1.8 MiB |
|
Before Width: | Height: | Size: 65 KiB After Width: | Height: | Size: 65 KiB |
|
Before Width: | Height: | Size: 4.1 KiB After Width: | Height: | Size: 4.1 KiB |
|
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 2.0 KiB After Width: | Height: | Size: 2.0 KiB |
@@ -1,6 +1,6 @@
|
||||
Grammatical Framework Tutorial
|
||||
Aarne Ranta
|
||||
Version 3.1.2, November 2008
|
||||
December 2010 (November 2008)
|
||||
|
||||
|
||||
% NOTE: this is a txt2tags file.
|
||||
@@ -626,7 +626,7 @@ You can chop this tutorial into a set of slides by the command
|
||||
```
|
||||
where the program ``htmls`` is distributed with GF (see below), in
|
||||
|
||||
[``GF/src/tools/Htmls.hs`` http://digitalgrammars.com/gf/src/tools/Htmls.hs]
|
||||
[``GF/src/tools/Htmls.hs`` http://grammaticalframework.org/src/tools/Htmls.hs]
|
||||
|
||||
The slides will appear as a set of files beginning with ``01-gf-tutorial.htmls``.
|
||||
|
||||
@@ -700,7 +700,7 @@ In general, a GF grammar is **multilingual**:
|
||||
|
||||
Open-source free software, downloaded via the GF Homepage:
|
||||
|
||||
[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf/]
|
||||
[``grammaticalframework.org`` http://grammaticalframework.org/]
|
||||
|
||||
There you find
|
||||
- binaries for Linux, Mac OS X, and Windows
|
||||
@@ -709,11 +709,11 @@ There you find
|
||||
|
||||
|
||||
Many examples in this tutorial are
|
||||
[online http://digitalgrammars.com/gf/examples/tutorial].
|
||||
[online http://grammaticalframework.org/examples/tutorial].
|
||||
|
||||
Normally you don't have to compile GF yourself.
|
||||
But, if you do want to compile GF from source follow the
|
||||
instructions in the [Developers Guide gf-developers.html].
|
||||
instructions in the [Developers Guide ../gf-developers.html].
|
||||
|
||||
|
||||
#NEW
|
||||
@@ -2453,7 +2453,7 @@ can be used to read a text and return for each word its analyses
|
||||
```
|
||||
The command ``morpho_quiz = mq`` generates inflection exercises.
|
||||
```
|
||||
% gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfc
|
||||
% gf -path=alltenses:prelude $GF_LIB_PATH/alltenses/IrregFre.gfo
|
||||
|
||||
> morpho_quiz -cat=V
|
||||
|
||||
@@ -2970,7 +2970,7 @@ Language-specific and language-independent parts - roughly,
|
||||
|
||||
Full API documentation on-line: the **resource synopsis**,
|
||||
|
||||
[``digitalgrammars.com/gf/lib/resource/doc/synopsis.html`` http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html]
|
||||
[``grammaticalframework.org/lib/resource/doc/synopsis.html`` http://grammaticalframework.org/lib/doc/synopsis.html]
|
||||
|
||||
|
||||
#NEW
|
||||
@@ -4530,10 +4530,10 @@ This facility is based on several components:
|
||||
|
||||
The portable format is called PGF, "Portable Grammar Format".
|
||||
|
||||
This format is produced by the GF batch compiler ``gfc``,
|
||||
This format is produced by the GF batch compiler ``gf``,
|
||||
executable from the operative system shell:
|
||||
```
|
||||
% gfc --make SOURCE.gf
|
||||
% gf --make SOURCE.gf
|
||||
```
|
||||
PGF is the recommended format in
|
||||
which final grammar products are distributed, because they
|
||||
@@ -4605,12 +4605,12 @@ For this, you need the Haskell compiler [GHC http://www.haskell.org/ghc].
|
||||
|
||||
#NEW
|
||||
|
||||
===Producing GFCC for the translator===
|
||||
===Producing PGF for the translator===
|
||||
|
||||
Then produce a GFCC file. For instance, the ``Food`` grammar set can be
|
||||
Then produce a PGF file. For instance, the ``Food`` grammar set can be
|
||||
compiled as follows:
|
||||
```
|
||||
% gfc --make FoodEng.gf FoodIta.gf
|
||||
% gf --make FoodEng.gf FoodIta.gf
|
||||
```
|
||||
This produces the file ``Food.pgf`` (its name comes from the abstract syntax).
|
||||
|
||||
@@ -4714,11 +4714,11 @@ abstract Query = {
|
||||
To make it easy to define a transfer function, we export the
|
||||
abstract syntax to a system of Haskell datatypes:
|
||||
```
|
||||
% gfc --output-format=haskell Query.pgf
|
||||
% gf --output-format=haskell Query.pgf
|
||||
```
|
||||
It is also possible to produce the Haskell file together with GFCC, by
|
||||
It is also possible to produce the Haskell file together with PGF, by
|
||||
```
|
||||
% gfc --make --output-format=haskell QueryEng.gf
|
||||
% gf --make --output-format=haskell QueryEng.gf
|
||||
```
|
||||
The result is a file named ``Query.hs``, containing a
|
||||
module named ``Query``.
|
||||
@@ -4871,7 +4871,7 @@ translate tr gr s = case parseAllLang gr (startCat gr) s of
|
||||
To automate the production of the system, we write a ``Makefile`` as follows:
|
||||
```
|
||||
all:
|
||||
gfc --make --output-format=haskell QueryEng
|
||||
gf --make --output-format=haskell QueryEng
|
||||
ghc --make -o ./math TransferLoop.hs
|
||||
strip math
|
||||
```
|
||||
@@ -4928,7 +4928,7 @@ program compiled from GF grammars as run on an iPhone.
|
||||
JavaScript is one of the output formats of the GF batch compiler. Thus the following
|
||||
command generates a JavaScript file from two ``Food`` grammars.
|
||||
```
|
||||
% gfc --make --output-format=js FoodEng.gf FoodIta.gf
|
||||
% gf --make --output-format=js FoodEng.gf FoodIta.gf
|
||||
```
|
||||
The name of the generated file is ``Food.js``, derived from the top-most abstract
|
||||
syntax name. This file contains the multilingual grammar as a JavaScript object.
|
||||
@@ -4944,7 +4944,7 @@ some other JavaScript and HTML files; these files can be used
|
||||
as templates for building applications.
|
||||
|
||||
An example of usage is
|
||||
[``translator.html`` ../lib/javascript/translator.html],
|
||||
[``translator.html`` http://grammaticalframework.org:41296],
|
||||
which is in fact initialized with
|
||||
a pointer to the Food grammar, so that it provides translation between the English
|
||||
and Italian grammars:
|
||||
@@ -4969,12 +4969,12 @@ The standard way of using GF in speech recognition is by building
|
||||
GF supports several formats, including
|
||||
GSL, the formatused in the [Nuance speech recognizer www.nuance.com].
|
||||
|
||||
GSL is produced from GF by running ``gfc`` with the flag
|
||||
GSL is produced from GF by running ``gf`` with the flag
|
||||
``--output-format=gsl``.
|
||||
|
||||
Example: GSL generated from ``FoodsEng.gf``.
|
||||
```
|
||||
% gfc --make --output-format=gsl FoodsEng.gf
|
||||
% gf --make --output-format=gsl FoodsEng.gf
|
||||
% more FoodsEng.gsl
|
||||
|
||||
;GSL2.0
|
||||
@@ -5017,6 +5017,6 @@ Other formats available via the ``--output-format`` flag include:
|
||||
| ``slf`` | finite automaton in the HTK SLF format
|
||||
| ``slf_sub`` | finite automaton with sub-automata in HTK SLF
|
||||
|
||||
All currently available formats can be seen with ``gfc --help``.
|
||||
All currently available formats can be seen with ``gf --help``.
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 2.2 KiB After Width: | Height: | Size: 2.2 KiB |
46
doc/vr.html
@@ -1,46 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
||||
<TITLE>Library-Based Grammar Engineering</TITLE>
|
||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
||||
<P ALIGN="center"><CENTER><H1>Library-Based Grammar Engineering</H1>
|
||||
<FONT SIZE="4">
|
||||
<I>VR Project 2006-2008</I><BR>
|
||||
</FONT></CENTER>
|
||||
|
||||
<H1>Staff</H1>
|
||||
<P>
|
||||
Lars Borin (co-leader)
|
||||
</P>
|
||||
<P>
|
||||
Robin Cooper (co-leader)
|
||||
</P>
|
||||
<P>
|
||||
Aarne Ranta (project responsible)
|
||||
</P>
|
||||
<P>
|
||||
Sibylle Schupp (co-leader)
|
||||
</P>
|
||||
<H1>Publications</H1>
|
||||
<P>
|
||||
Ali El Dada, MSc Thesis
|
||||
</P>
|
||||
<P>
|
||||
Muhammad Humayoun, MSc Thesis
|
||||
</P>
|
||||
<P>
|
||||
Janna Khegai,
|
||||
Language Engineering in GF, PhD Thesis, Chalmers. 2006.
|
||||
</P>
|
||||
<H1>Links</H1>
|
||||
<P>
|
||||
<A HREF="http://www.cs.chalmers.se/~aarne/GF/">GF</A>
|
||||
</P>
|
||||
<P>
|
||||
<A HREF="http://www.cs.chalmers.se/~markus/FM/">Functional Morphology</A>
|
||||
</P>
|
||||
|
||||
<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) -->
|
||||
<!-- cmdline: txt2tags -thtml vr.txt -->
|
||||
</BODY></HTML>
|
||||
32
doc/vr.txt
@@ -1,32 +0,0 @@
|
||||
Library-Based Grammar Engineering
|
||||
VR Project 2006-2008
|
||||
|
||||
|
||||
=Staff=
|
||||
|
||||
Lars Borin (co-leader)
|
||||
|
||||
Robin Cooper (co-leader)
|
||||
|
||||
Aarne Ranta (project responsible)
|
||||
|
||||
Sibylle Schupp (co-leader)
|
||||
|
||||
|
||||
|
||||
=Publications=
|
||||
|
||||
Ali El Dada, MSc Thesis
|
||||
|
||||
Muhammad Humayoun, MSc Thesis
|
||||
|
||||
Janna Khegai,
|
||||
Language Engineering in GF, PhD Thesis, Chalmers. 2006.
|
||||
|
||||
|
||||
|
||||
=Links=
|
||||
|
||||
[GF http://www.cs.chalmers.se/~aarne/GF/]
|
||||
|
||||
[Functional Morphology http://www.cs.chalmers.se/~markus/FM/]
|
||||