forked from GitHub/gf-rgl
Move everything related to synopsis into subfolder
Clean up some unused and generated files
This commit is contained in:
1698
doc/CC_eng_tha.txt
1698
doc/CC_eng_tha.txt
File diff suppressed because it is too large
Load Diff
66
doc/Makefile
66
doc/Makefile
@@ -1,66 +1,18 @@
|
||||
.PHONY: all index status synopsis abstract
|
||||
.PHONY: all status synopsis abstract
|
||||
|
||||
GFDOC=gfdoc
|
||||
S=../src
|
||||
|
||||
all: synopsis
|
||||
|
||||
GF_alltenses=$(GF_LIB_PATH)/alltenses
|
||||
GF=gf
|
||||
GFDOC=gfdoc
|
||||
status: status.html
|
||||
|
||||
index:
|
||||
txt2tags -thtml index.txt
|
||||
status:
|
||||
synopsis:
|
||||
make -C synopsis
|
||||
|
||||
status.html:
|
||||
txt2tags -thtml status.txt
|
||||
|
||||
synopsis: synopsis.html
|
||||
|
||||
S=../src
|
||||
|
||||
# List of languages extracted from languages.csv, with 'Synopsis' column == y
|
||||
LANGS=$(shell cat ../languages.csv | cut -d',' -f1,10 | grep ',y' | cut -d',' -f1)
|
||||
|
||||
# This list was constructed by observing what files MkSynopsis.hs reads
|
||||
SRC_FILES=$(S)/abstract/Common.gf $(S)/abstract/Cat.gf $(S)/api/Constructors.gf $(S)/abstract/Structural.gf $(patsubst %,$S/*/Paradigms%.gf,$(LANGS))
|
||||
|
||||
EXAMPLES_OUT=$(patsubst %,api-examples-%.txt,$(LANGS))
|
||||
INCLUDES=synopsis-intro.txt categories-intro.txt categories-imagemap.html synopsis-additional.txt synopsis-browse.txt synopsis-example.txt
|
||||
|
||||
synopsis.txt: MkSynopsis.hs MkExxTable.hs $(INCLUDES) $(EXAMPLES_OUT) $(SRC_FILES)
|
||||
runghc -i.. MkSynopsis.hs
|
||||
|
||||
TMP=tmp.html
|
||||
synopsis.html: synopsis.txt _template.html
|
||||
txt2tags --target=html --no-headers --quiet --toc --outfile=$@ --infile=$<
|
||||
pandoc \
|
||||
--from=html \
|
||||
--to=html5 \
|
||||
--standalone \
|
||||
--template=_template.html \
|
||||
--css=synopsis.css \
|
||||
--metadata='title:"GF Resource Grammar Library: Synopsis"' \
|
||||
--variable='rel-root:../..' \
|
||||
--output=$(TMP) \
|
||||
$@
|
||||
mv $(TMP) $@
|
||||
|
||||
categories.png: categories.dot
|
||||
dot -Tpng $^ > $@
|
||||
|
||||
categories-imagemap.html: categories.dot
|
||||
dot -Tcmapx $^ > $@
|
||||
|
||||
abstract:
|
||||
$(GFDOC) -txthtml $S/abstract/*.gf
|
||||
mv $S/abstract/*.html abstract
|
||||
|
||||
api-examples.gfs: api-examples.txt MkExx.hs
|
||||
runghc MkExx.hs < $< > $@
|
||||
|
||||
# Since .gfo files aren't self-contained, the dependencies given here are
|
||||
# incomplete. But I am thinking that the Try%.gfo file will always be newer
|
||||
# than any other files it depends on, so the rule will trigger when
|
||||
# needed anyway. //TH 2018-10-22
|
||||
api-examples-%.txt: $(GF_alltenses)/Try%.gfo api-examples.gfs
|
||||
GF_LIB_PATH=$(GF_LIB_PATH) $(GF) -retain -s $< <api-examples.gfs >$@
|
||||
|
||||
clean:
|
||||
rm -rf synopsis.txt api-examples.gfs $(EXAMPLES_OUT)
|
||||
|
||||
22
doc/Test.hs
22
doc/Test.hs
@@ -1,22 +0,0 @@
|
||||
import qualified Data.Map as Map
|
||||
import Data.Char
|
||||
|
||||
gold = "CC_eng_tha.txt"
|
||||
tested = "api-examples-Tha.txt"
|
||||
|
||||
main = do
|
||||
s <- readFile gold
|
||||
let corrects = Map.fromList $ exx 1 5 2 (lines s)
|
||||
-- mapM_ putStrLn $ concat [[t,s] | (t,s) <- Map.toList corrects]
|
||||
t <- readFile tested
|
||||
mapM_ (doTest corrects) (exx 18 22 1 (map (drop 4) (lines t)))
|
||||
|
||||
exx x y z ss = [(ss!!k,ss!!(k+z)) | k <- [x,y .. length ss - 2]]
|
||||
|
||||
doTest corrects (t,s) = case Map.lookup t corrects of
|
||||
Just c -> if unspace s == uncomment c then return () else mapM_ putStrLn [t,unspace s,c]
|
||||
_ -> return ()
|
||||
|
||||
unspace = filter (not . isSpace)
|
||||
uncomment = unspace . takeWhile (/= '-')
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 120 KiB |
BIN
doc/editor.png
BIN
doc/editor.png
Binary file not shown.
|
Before Width: | Height: | Size: 20 KiB |
267
doc/index.txt
267
doc/index.txt
@@ -1,267 +0,0 @@
|
||||
GF Resource Grammar Library v. 1.2
|
||||
Author: Aarne Ranta <aarne (at) cs.chalmers.se>
|
||||
Last update: %%date(%c)
|
||||
|
||||
% NOTE: this is a txt2tags file.
|
||||
% Create an html file from this file using:
|
||||
% txt2tags --toc -thtml index.txt
|
||||
|
||||
%!target:html
|
||||
|
||||
%!postproc(html): #BCEN <center>
|
||||
%!postproc(html): #ECEN </center>
|
||||
|
||||
|
||||
#BCEN
|
||||
|
||||
[10lang-large.png]
|
||||
|
||||
#ECEN
|
||||
|
||||
|
||||
The GF Resource Grammar Library defines the basic grammar of
|
||||
ten languages:
|
||||
Danish, English, Finnish, French, German,
|
||||
Italian, Norwegian, Russian, Spanish, Swedish.
|
||||
Still incomplete implementations for Arabic and Catalan are also
|
||||
included.
|
||||
|
||||
**New** in December 2007: Browsing the library by syntax editor
|
||||
[directly on the web ../../../demos/resource-api/editor.html].
|
||||
|
||||
|
||||
|
||||
|
||||
==Authors==
|
||||
|
||||
Inger Andersson and Therese Soderberg (Spanish morphology),
|
||||
Nicolas Barth and Sylvain Pogodalla (French verb list),
|
||||
Ali El Dada (Arabic modules),
|
||||
Magda Gerritsen and Ulrich Real (Russian paradigms and lexicon),
|
||||
Janna Khegai (Russian modules),
|
||||
Bjorn Bringert (many Swadesh lexica),
|
||||
Carlos Gonzalía (Spanish cardinals),
|
||||
Harald Hammarström (German morphology),
|
||||
Patrik Jansson (Swedish cardinals),
|
||||
Andreas Priesnitz (German lexicon),
|
||||
Aarne Ranta,
|
||||
Jordi Saludes (Catalan modules),
|
||||
Henning Thielemann (German lexicon).
|
||||
|
||||
|
||||
We are grateful for contributions and
|
||||
comments to several other people who have used this and
|
||||
the previous versions of the resource library, including
|
||||
Ludmilla Bogavac,
|
||||
Ana Bove,
|
||||
David Burke,
|
||||
Lauri Carlson,
|
||||
Gloria Casanellas,
|
||||
Karin Cavallin,
|
||||
Robin Cooper,
|
||||
Hans-Joachim Daniels,
|
||||
Elisabet Engdahl,
|
||||
Markus Forsberg,
|
||||
Kristofer Johannisson,
|
||||
Anni Laine,
|
||||
Hans Leiß,
|
||||
Peter Ljunglöf,
|
||||
Saara Myllyntausta,
|
||||
Wanjiku Ng'ang'a,
|
||||
Nadine Perera,
|
||||
Jordi Saludes.
|
||||
|
||||
|
||||
==License==
|
||||
|
||||
The GF Resource Grammar Library is open-source software licensed under
|
||||
GNU Lesser General Public License (LGPL). See the file [LICENSE ../LICENSE] for more
|
||||
details.
|
||||
|
||||
|
||||
==Scope==
|
||||
|
||||
Coverage, for each language:
|
||||
- complete morphology
|
||||
- lexicon of the ca. 100 most important structural words
|
||||
- test lexicon of ca. 300 content words (rough equivalents in each language)
|
||||
- list of irregular verbs (separately for each language)
|
||||
- representative fragment of syntax (cf. CLE (Core Language Engine))
|
||||
- rather flat semantics (cf. Quasi-Logical Form of CLE)
|
||||
|
||||
|
||||
Organization:
|
||||
- top-level (API) modules
|
||||
- Ground API + special-purpose APIs
|
||||
- "school grammar" concepts rather than advanced linguistic theory
|
||||
|
||||
|
||||
Presentation:
|
||||
- tool ``gfdoc`` for generating HTML from grammars
|
||||
- example collections
|
||||
|
||||
|
||||
==Location==
|
||||
|
||||
Assuming you have installed the libraries, you will find the precompiled
|
||||
``gfc`` and ``gfr`` files directly under ``$GF_LIB_PATH``, whose default
|
||||
value is ``/usr/local/share/GF/``. The precompiled subdirectories are
|
||||
```
|
||||
alltenses
|
||||
mathematical
|
||||
multimodal
|
||||
present
|
||||
```
|
||||
Do for instance
|
||||
```
|
||||
cd $GF_LIB_PATH
|
||||
gf alltenses/langs.gfcm
|
||||
|
||||
> p -cat=S -lang=LangEng "this grammar is too big" | tb
|
||||
```
|
||||
For more details, see the [Synopsis synopsis.html].
|
||||
|
||||
|
||||
==Compilation==
|
||||
|
||||
If you want to compile the library from scratch, use ``make`` in the root of
|
||||
the source directory:
|
||||
```
|
||||
cd GF/lib/resource-1.0
|
||||
make
|
||||
```
|
||||
The ``make`` procedure does not by default make Arabic and Catalan, but you
|
||||
can uncomment the relevant lines in ``Makefile`` to compile them.
|
||||
|
||||
|
||||
==Encoding==
|
||||
|
||||
Finnish, German, Romance, and Scandinavian languages are in isolatin-1.
|
||||
|
||||
Arabic and Russian are in UTF-8.
|
||||
|
||||
English is in pure ASCII.
|
||||
|
||||
The different encodings imply, unfortunately, that it is hard to get
|
||||
a nice view of all languages simultaneously. The easiest way to achieve this is
|
||||
to use ``gfeditor``, which automatically converts grammars to UTF-8.
|
||||
|
||||
|
||||
==Using the resource as library==
|
||||
|
||||
This API is accessible by both ``present`` and ``alltenses``. The modules you most often need are
|
||||
- ``Syntax``, the interface to syntactic structures
|
||||
- ``Syntax``//L//, the implementations of ``Syntax`` for each language //L//
|
||||
- ``Paradigms``//L//, the morphological paradigms for each language //L//
|
||||
|
||||
|
||||
The [Synopsis synopsis.html] gives examples on the typical usage of these
|
||||
modules.
|
||||
|
||||
|
||||
==Using the resource as top level grammar==
|
||||
|
||||
The following modules can be used for parsing and linearization. They are accessible from both
|
||||
``present`` and ``alltenses``.
|
||||
- ``Lang``//L// for each language //L//, implementing a common abstract syntax ``Lang``
|
||||
- ``Danish``, ``English``, etc, implementing ``Lang`` with language-specific extensions
|
||||
|
||||
|
||||
In addition, there is in both ``present`` and ``alltenses`` the file
|
||||
- ``langs.gfcm``, a package with precompiled ``Lang``//L// grammars
|
||||
|
||||
|
||||
A way to test and view the resource grammar is to load ``langs.gfcm`` either into ``gfeditor``
|
||||
or into the ``gf`` shell and perform actions such as syntax editing and treebank generation.
|
||||
For instance, the command
|
||||
```
|
||||
> p -lang=LangEng -cat=S "this grammar is too big" | tb
|
||||
```
|
||||
creates a treebank entry with translations of this sentence.
|
||||
|
||||
For parsing, currently only English and the Scandinavian languages are within the limits ofr
|
||||
reasonable resources. For other languages //L//, parsing with ``Lang``//L// will probably eat
|
||||
up the computer resources before finishing the parser generation.
|
||||
|
||||
|
||||
|
||||
==Accessing the lower level ground API==
|
||||
|
||||
The ``Syntax`` API is implemented in terms a bunch of ``abstract`` modules, which
|
||||
as of version 1.2 are mainly interesting for implementors of the resource.
|
||||
See the [documentation for version 1.1 index-1.1.html] for more details.
|
||||
|
||||
|
||||
==Known bugs and missing components==
|
||||
|
||||
Danish
|
||||
- the lexicon and chosen inflections are only partially verified
|
||||
|
||||
|
||||
English
|
||||
|
||||
|
||||
Finnish
|
||||
- wrong cases in some passive constructions
|
||||
|
||||
|
||||
French
|
||||
- multiple clitics (with V3) not always right
|
||||
- third person pronominal questions with inverted word order
|
||||
have wrong forms if "t" is required e.g.
|
||||
(e.g. "comment fera-t-il" becomes "comment fera il")
|
||||
|
||||
|
||||
German
|
||||
|
||||
|
||||
Italian
|
||||
- multiple clitics (with V3) not always right
|
||||
|
||||
|
||||
Norwegian
|
||||
- the lexicon and chosen inflections are only partially verified
|
||||
|
||||
|
||||
Russian
|
||||
- some functions missing
|
||||
- some regular paradigms are missing
|
||||
|
||||
|
||||
Spanish
|
||||
- multiple clitics (with V3) not always right
|
||||
- missing contractions with imperatives and clitics
|
||||
|
||||
|
||||
Swedish
|
||||
|
||||
|
||||
|
||||
|
||||
==More reading==
|
||||
|
||||
[Synopsis synopsis.html]. The concise guide to API v. 1.2.
|
||||
|
||||
[Grammars as Software Libraries gslt-sem-2006.html]. Slides
|
||||
with background and motivation for the resource grammar library.
|
||||
|
||||
[GF Resource Grammar Library Version 1.0 clt2006.html]. Slides
|
||||
giving an overview of the library and practical hints on its use.
|
||||
|
||||
[How to write resource grammars Resource-HOWTO.html]. Helps you
|
||||
start if you want to add another language to the library.
|
||||
|
||||
[Parametrized modules for Romance languages http://www.cs.chalmers.se/~aarne/geocal2006.pdf].
|
||||
Slides explaining some ideas in the implementation of
|
||||
French, Italian, and Spanish.
|
||||
|
||||
[Grammar writing by examples http://www.cs.chalmers.se/~aarne/slides/webalt-2005.pdf].
|
||||
Slides showing how linearization rules are written as strings parsable by the resource grammar.
|
||||
|
||||
[Multimodal Resource Grammars http://www.cs.chalmers.se/~aarne/slides/talk-edin2005.pdf].
|
||||
Slides showing how to use the multimodal resource library. N.B. the library
|
||||
examples are from ``multimodal/old``, which is a reduced-size API.
|
||||
|
||||
[GF Resource Grammar Library ../../../doc/resource.pdf] (pdf).
|
||||
Printable user manual with API documentation, for version 1.0.
|
||||
|
||||
581
doc/official.txt
581
doc/official.txt
@@ -1,581 +0,0 @@
|
||||
The Official EU languages
|
||||
|
||||
The 20 official languages of the EU and their abbreviations are as follows:
|
||||
|
||||
Español ES Spanish
|
||||
Dansk DA Danish
|
||||
Deutsch DE German
|
||||
Elinika EL Greek
|
||||
English EN
|
||||
Français FR French
|
||||
Italiano IT Italian
|
||||
Nederlands NL Dutch
|
||||
Português PT Portuguese
|
||||
Suomi FI Finnish
|
||||
Svenska SV Swedish
|
||||
?e?tina CS Czech
|
||||
Eesti ET Estonian
|
||||
Latviesu valoda LV Latvian
|
||||
Lietuviu kalba LT Lithuanian
|
||||
Magyar HU Hungarian
|
||||
Malti MT Maltese
|
||||
Polski PL Polish
|
||||
Sloven?ina SK Slovak
|
||||
Sloven??ina SL Slovene
|
||||
|
||||
http://europa.eu.int/comm/education/policies/lang/languages/index_en.html
|
||||
|
||||
-----
|
||||
http://www.w3.org/WAI/ER/IG/ert/iso639.htm
|
||||
|
||||
ar arabic
|
||||
no norwegian
|
||||
ru russian
|
||||
|
||||
--
|
||||
|
||||
ISO 639: 3-letter codes
|
||||
|
||||
abk ab Abkhazian
|
||||
ace Achinese
|
||||
ach Acoli
|
||||
ada Adangme
|
||||
aar aa Afar
|
||||
afh Afrihili
|
||||
afr af Afrikaans
|
||||
afa Afro-Asiatic (Other)
|
||||
aka Akan
|
||||
akk Akkadian
|
||||
alb/sqi sq Albanian
|
||||
ale Aleut
|
||||
alg Algonquian languages
|
||||
tut Altaic (Other)
|
||||
amh am Amharic
|
||||
apa Apache languages
|
||||
ara ar Arabic
|
||||
arc Aramaic
|
||||
arp Arapaho
|
||||
arn Araucanian
|
||||
arw Arawak
|
||||
arm/hye hy Armenian
|
||||
art Artificial (Other)
|
||||
asm as Assamese
|
||||
ath Athapascan languages
|
||||
map Austronesian (Other)
|
||||
ava Avaric
|
||||
ave Avestan
|
||||
awa Awadhi
|
||||
aym ay Aymara
|
||||
aze az Azerbaijani
|
||||
nah Aztec
|
||||
ban Balinese
|
||||
bat Baltic (Other)
|
||||
bal Baluchi
|
||||
bam Bambara
|
||||
bai Bamileke languages
|
||||
bad Banda
|
||||
bnt Bantu (Other)
|
||||
bas Basa
|
||||
bak ba Bashkir
|
||||
baq/eus eu Basque
|
||||
bej Beja
|
||||
bem Bemba
|
||||
ben bn Bengali
|
||||
ber Berber (Other)
|
||||
bho Bhojpuri
|
||||
bih bh Bihari
|
||||
bik Bikol
|
||||
bin Bini
|
||||
bis bi Bislama
|
||||
bra Braj
|
||||
bre be Breton
|
||||
bug Buginese
|
||||
bul bg Bulgarian
|
||||
bua Buriat
|
||||
bur/mya my Burmese
|
||||
bel be Byelorussian
|
||||
cad Caddo
|
||||
car Carib
|
||||
cat ca Catalan
|
||||
cau Caucasian (Other)
|
||||
ceb Cebuano
|
||||
cel Celtic (Other)
|
||||
cai Central American Indian (Other)
|
||||
chg Chagatai
|
||||
cha Chamorro
|
||||
che Chechen
|
||||
chr Cherokee
|
||||
chy Cheyenne
|
||||
chb Chibcha
|
||||
chi/zho zh Chinese
|
||||
chn Chinook jargon
|
||||
cho Choctaw
|
||||
chu Church Slavic
|
||||
chv Chuvash
|
||||
cop Coptic
|
||||
cor Cornish
|
||||
cos co Corsican
|
||||
cre Cree
|
||||
mus Creek
|
||||
crp Creoles and Pidgins (Other)
|
||||
cpe Creoles and Pidgins, English-based (Other)
|
||||
cpf Creoles and Pidgins, French-based (Other)
|
||||
cpp Creoles and Pidgins, Portuguese-based (Other)
|
||||
cus Cushitic (Other)
|
||||
hr Croatian
|
||||
ces/cze cs Czech
|
||||
dak Dakota
|
||||
dan da Danish
|
||||
del Delaware
|
||||
din Dinka
|
||||
div Divehi
|
||||
doi Dogri
|
||||
dra Dravidian (Other)
|
||||
dua Duala
|
||||
dut/nla nl Dutch
|
||||
dum Dutch, Middle (ca. 1050-1350)
|
||||
dyu Dyula
|
||||
dzo dz Dzongkha
|
||||
efi Efik
|
||||
egy Egyptian (Ancient)
|
||||
eka Ekajuk
|
||||
elx Elamite
|
||||
eng en English
|
||||
enm English, Middle (ca. 1100-1500)
|
||||
ang English, Old (ca. 450-1100)
|
||||
esk Eskimo (Other)
|
||||
epo eo Esperanto
|
||||
est et Estonian
|
||||
ewe Ewe
|
||||
ewo Ewondo
|
||||
fan Fang
|
||||
fat Fanti
|
||||
fao fo Faroese
|
||||
fij fj Fijian
|
||||
fin fi Finnish
|
||||
fiu Finno-Ugrian (Other)
|
||||
fon Fon
|
||||
fra/fre fr French
|
||||
frm French, Middle (ca. 1400-1600)
|
||||
fro French, Old (842- ca. 1400)
|
||||
fry fy Frisian
|
||||
ful Fulah
|
||||
gaa Ga
|
||||
gae/gdh Gaelic (Scots)
|
||||
glg gl Gallegan
|
||||
lug Ganda
|
||||
gay Gayo
|
||||
gez Geez
|
||||
geo/kat ka Georgian
|
||||
deu/ger de German
|
||||
gmh German, Middle High (ca. 1050-1500)
|
||||
goh German, Old High (ca. 750-1050)
|
||||
gem Germanic (Other)
|
||||
gil Gilbertese
|
||||
gon Gondi
|
||||
got Gothic
|
||||
grb Grebo
|
||||
grc Greek, Ancient (to 1453)
|
||||
ell/gre el Greek, Modern (1453-)
|
||||
kal kl Greenlandic
|
||||
grn gn Guarani
|
||||
guj gu Gujarati
|
||||
hai Haida
|
||||
hau ha Hausa
|
||||
haw Hawaiian
|
||||
heb he Hebrew
|
||||
her Herero
|
||||
hil Hiligaynon
|
||||
him Himachali
|
||||
hin hi Hindi
|
||||
hmo Hiri Motu
|
||||
hun hu Hungarian
|
||||
hup Hupa
|
||||
iba Iban
|
||||
ice/isl is Icelandic
|
||||
ibo Igbo
|
||||
ijo Ijo
|
||||
ilo Iloko
|
||||
inc Indic (Other)
|
||||
ine Indo-European (Other)
|
||||
ind id Indonesian
|
||||
ina ia Interlingua (International Auxiliary language Association)
|
||||
ine - Interlingue
|
||||
iku iu Inuktitut
|
||||
ipk ik Inupiak
|
||||
ira Iranian (Other)
|
||||
gai/iri ga Irish
|
||||
sga Irish, Old (to 900)
|
||||
mga Irish, Middle (900 - 1200)
|
||||
iro Iroquoian languages
|
||||
ita it Italian
|
||||
jpn ja Japanese
|
||||
jav/jaw jv/jw Javanese
|
||||
jrb Judeo-Arabic
|
||||
jpr Judeo-Persian
|
||||
kab Kabyle
|
||||
kac Kachin
|
||||
kam Kamba
|
||||
kan kn Kannada
|
||||
kau Kanuri
|
||||
kaa Kara-Kalpak
|
||||
kar Karen
|
||||
kas ks Kashmiri
|
||||
kaw Kawi
|
||||
kaz kk Kazakh
|
||||
kha Khasi
|
||||
khm km Khmer
|
||||
khi Khoisan (Other)
|
||||
kho Khotanese
|
||||
kik Kikuyu
|
||||
kin rw Kinyarwanda
|
||||
kir ky Kirghiz
|
||||
kom Komi
|
||||
kon Kongo
|
||||
kok Konkani
|
||||
kor ko Korean
|
||||
kpe Kpelle
|
||||
kro Kru
|
||||
kua Kuanyama
|
||||
kum Kumyk
|
||||
kur ku Kurdish
|
||||
kru Kurukh
|
||||
kus Kusaie
|
||||
kut Kutenai
|
||||
lad Ladino
|
||||
lah Lahnda
|
||||
lam Lamba
|
||||
oci oc Langue d'Oc (post 1500)
|
||||
lao lo Lao
|
||||
lat la Latin
|
||||
lav lv Latvian
|
||||
ltz Letzeburgesch
|
||||
lez Lezghian
|
||||
lin ln Lingala
|
||||
lit lt Lithuanian
|
||||
loz Lozi
|
||||
lub Luba-Katanga
|
||||
lui Luiseno
|
||||
lun Lunda
|
||||
luo Luo (Kenya and Tanzania)
|
||||
mac/mak mk Macedonian
|
||||
mad Madurese
|
||||
mag Magahi
|
||||
mai Maithili
|
||||
mak Makasar
|
||||
mlg mg Malagasy
|
||||
may/msa ms Malay
|
||||
mal Malayalam
|
||||
mlt ml Maltese
|
||||
man Mandingo
|
||||
mni Manipuri
|
||||
mno Manobo languages
|
||||
max Manx
|
||||
mao/mri mi Maori
|
||||
mar mr Marathi
|
||||
chm Mari
|
||||
mah Marshall
|
||||
mwr Marwari
|
||||
mas Masai
|
||||
myn Mayan languages
|
||||
men Mende
|
||||
mic Micmac
|
||||
min Minangkabau
|
||||
mis Miscellaneous (Other)
|
||||
moh Mohawk
|
||||
mol mo Moldavian
|
||||
mkh Mon-Kmer (Other)
|
||||
lol Mongo
|
||||
mon mn Mongolian
|
||||
mos Mossi
|
||||
mul Multiple languages
|
||||
mun Munda languages
|
||||
nau na Nauru
|
||||
nav Navajo
|
||||
nde Ndebele, North
|
||||
nbl Ndebele, South
|
||||
ndo Ndongo
|
||||
nep ne Nepali
|
||||
new Newari
|
||||
nic Niger-Kordofanian (Other)
|
||||
ssa Nilo-Saharan (Other)
|
||||
niu Niuean
|
||||
non Norse, Old
|
||||
nai North American Indian (Other)
|
||||
nor no Norwegian
|
||||
nno Norwegian (Nynorsk)
|
||||
nub Nubian languages
|
||||
nym Nyamwezi
|
||||
nya Nyanja
|
||||
nyn Nyankole
|
||||
nyo Nyoro
|
||||
nzi Nzima
|
||||
oji Ojibwa
|
||||
ori or Oriya
|
||||
orm om Oromo
|
||||
osa Osage
|
||||
oss Ossetic
|
||||
oto Otomian languages
|
||||
pal Pahlavi
|
||||
pau Palauan
|
||||
pli Pali
|
||||
pam Pampanga
|
||||
pag Pangasinan
|
||||
pan pa Panjabi
|
||||
pap Papiamento
|
||||
paa Papuan-Australian (Other)
|
||||
fas/per fa Persian
|
||||
peo Persian, Old (ca 600 - 400 B.C.)
|
||||
phn Phoenician
|
||||
pol pl Polish
|
||||
pon Ponape
|
||||
por pt Portuguese
|
||||
pra Prakrit languages
|
||||
pro Provencal, Old (to 1500)
|
||||
pus ps Pushto
|
||||
que qu Quechua
|
||||
roh rm Rhaeto-Romance
|
||||
raj Rajasthani
|
||||
rar Rarotongan
|
||||
roa Romance (Other)
|
||||
ron/rum ro Romanian
|
||||
rom Romany
|
||||
run rn Rundi
|
||||
rus ru Russian
|
||||
sal Salishan languages
|
||||
sam Samaritan Aramaic
|
||||
smi Sami languages
|
||||
smo sm Samoan
|
||||
sad Sandawe
|
||||
sag sg Sango
|
||||
san sa Sanskrit
|
||||
srd Sardinian
|
||||
sco Scots
|
||||
sel Selkup
|
||||
sem Semitic (Other)
|
||||
sr Serbian
|
||||
scr sh Serbo-Croatian
|
||||
srr Serer
|
||||
shn Shan
|
||||
sna sn Shona
|
||||
sid Sidamo
|
||||
bla Siksika
|
||||
snd sd Sindhi
|
||||
sin si Singhalese
|
||||
sit - Sino-Tibetan (Other)
|
||||
sio Siouan languages
|
||||
sla Slavic (Other)
|
||||
ssw ss Siswant
|
||||
slk/slo sk Slovak
|
||||
slv sl Slovenian
|
||||
sog Sogdian
|
||||
som so Somali
|
||||
son Songhai
|
||||
wen Sorbian languages
|
||||
nso Sotho, Northern
|
||||
sot st Sotho, Southern
|
||||
sai South American Indian (Other)
|
||||
esl/spa es Spanish
|
||||
suk Sukuma
|
||||
sux Sumerian
|
||||
sun su Sudanese
|
||||
sus Susu
|
||||
swa sw Swahili
|
||||
ssw Swazi
|
||||
sve/swe sv Swedish
|
||||
syr Syriac
|
||||
tgl tl Tagalog
|
||||
tah Tahitian
|
||||
tgk tg Tajik
|
||||
tmh Tamashek
|
||||
tam ta Tamil
|
||||
tat tt Tatar
|
||||
tel te Telugu
|
||||
ter Tereno
|
||||
tha th Thai
|
||||
bod/tib bo Tibetan
|
||||
tig Tigre
|
||||
tir ti Tigrinya
|
||||
tem Timne
|
||||
tiv Tivi
|
||||
tli Tlingit
|
||||
tog to Tonga (Nyasa)
|
||||
ton Tonga (Tonga Islands)
|
||||
tru Truk
|
||||
tsi Tsimshian
|
||||
tso ts Tsonga
|
||||
tsn tn Tswana
|
||||
tum Tumbuka
|
||||
tur tr Turkish
|
||||
ota Turkish, Ottoman (1500 - 1928)
|
||||
tuk tk Turkmen
|
||||
tyv Tuvinian
|
||||
twi tw Twi
|
||||
uga Ugaritic
|
||||
uig ug Uighur
|
||||
ukr uk Ukrainian
|
||||
umb Umbundu
|
||||
und Undetermined
|
||||
urd ur Urdu
|
||||
uzb uz Uzbek
|
||||
vai Vai
|
||||
ven Venda
|
||||
vie vi Vietnamese
|
||||
vol vo Volapük
|
||||
vot Votic
|
||||
wak Wakashan languages
|
||||
wal Walamo
|
||||
war Waray
|
||||
was Washo
|
||||
cym/wel cy Welsh
|
||||
wol wo Wolof
|
||||
xho xh Xhosa
|
||||
sah Yakut
|
||||
yao Yao
|
||||
yap Yap
|
||||
yid yi Yiddish
|
||||
yor yo Yoruba
|
||||
zap Zapotec
|
||||
zen Zenaga
|
||||
zha za Zhuang
|
||||
zul zu Zulu
|
||||
zun Zuni
|
||||
|
||||
ISO 639: 2-letter codes
|
||||
|
||||
AA "Afar"
|
||||
AB "Abkhazian"
|
||||
AF "Afrikaans"
|
||||
AM "Amharic"
|
||||
AR "Arabic"
|
||||
AS "Assamese"
|
||||
AY "Aymara"
|
||||
AZ "Azerbaijani"
|
||||
BA "Bashkir"
|
||||
BE "Byelorussian"
|
||||
BG "Bulgarian"
|
||||
BH "Bihari"
|
||||
BI "Bislama"
|
||||
BN "Bengali" "Bangla"
|
||||
BO "Tibetan"
|
||||
BR "Breton"
|
||||
CA "Catalan"
|
||||
CO "Corsican"
|
||||
CS "Czech"
|
||||
CY "Welsh"
|
||||
DA "Danish"
|
||||
DE "German"
|
||||
DZ "Bhutani"
|
||||
EL "Greek"
|
||||
EN "English" "American"
|
||||
EO "Esperanto"
|
||||
ES "Spanish"
|
||||
ET "Estonian"
|
||||
EU "Basque"
|
||||
FA "Persian"
|
||||
FI "Finnish"
|
||||
FJ "Fiji"
|
||||
FO "Faeroese"
|
||||
FR "French"
|
||||
FY "Frisian"
|
||||
GA "Irish"
|
||||
GD "Gaelic" "Scots Gaelic"
|
||||
GL "Galician"
|
||||
GN "Guarani"
|
||||
GU "Gujarati"
|
||||
HA "Hausa"
|
||||
HI "Hindi"
|
||||
HR "Croatian"
|
||||
HU "Hungarian"
|
||||
HY "Armenian"
|
||||
IA "Interlingua"
|
||||
IE "Interlingue"
|
||||
IK "Inupiak"
|
||||
IN "Indonesian"
|
||||
IS "Icelandic"
|
||||
IT "Italian"
|
||||
IW "Hebrew"
|
||||
JA "Japanese"
|
||||
JI "Yiddish"
|
||||
JW "Javanese"
|
||||
KA "Georgian"
|
||||
KK "Kazakh"
|
||||
KL "Greenlandic"
|
||||
KM "Cambodian"
|
||||
KN "Kannada"
|
||||
KO "Korean"
|
||||
KS "Kashmiri"
|
||||
KU "Kurdish"
|
||||
KY "Kirghiz"
|
||||
LA "Latin"
|
||||
LN "Lingala"
|
||||
LO "Laothian"
|
||||
LT "Lithuanian"
|
||||
LV "Latvian" "Lettish"
|
||||
MG "Malagasy"
|
||||
MI "Maori"
|
||||
MK "Macedonian"
|
||||
ML "Malayalam"
|
||||
MN "Mongolian"
|
||||
MO "Moldavian"
|
||||
MR "Marathi"
|
||||
MS "Malay"
|
||||
MT "Maltese"
|
||||
MY "Burmese"
|
||||
NA "Nauru"
|
||||
NE "Nepali"
|
||||
NL "Dutch"
|
||||
NO "Norwegian"
|
||||
OC "Occitan"
|
||||
OM "Oromo" "Afan"
|
||||
OR "Oriya"
|
||||
PA "Punjabi"
|
||||
PL "Polish"
|
||||
PS "Pashto" "Pushto"
|
||||
PT "Portuguese"
|
||||
QU "Quechua"
|
||||
RM "Rhaeto-Romance"
|
||||
RN "Kirundi"
|
||||
RO "Romanian"
|
||||
RU "Russian"
|
||||
RW "Kinyarwanda"
|
||||
SA "Sanskrit"
|
||||
SD "Sindhi"
|
||||
SG "Sangro"
|
||||
SH "Serbo-Croatian"
|
||||
SI "Singhalese"
|
||||
SK "Slovak"
|
||||
SL "Slovenian"
|
||||
SM "Samoan"
|
||||
SN "Shona"
|
||||
SO "Somali"
|
||||
SQ "Albanian"
|
||||
SR "Serbian"
|
||||
SS "Siswati"
|
||||
ST "Sesotho"
|
||||
SU "Sudanese"
|
||||
SV "Swedish"
|
||||
SW "Swahili"
|
||||
TA "Tamil"
|
||||
TE "Tegulu"
|
||||
TG "Tajik"
|
||||
TH "Thai"
|
||||
TI "Tigrinya"
|
||||
TK "Turkmen"
|
||||
TL "Tagalog"
|
||||
TN "Setswana"
|
||||
TO "Tonga"
|
||||
TR "Turkish"
|
||||
TS "Tsonga"
|
||||
TT "Tatar"
|
||||
TW "Twi"
|
||||
UK "Ukrainian"
|
||||
UR "Urdu"
|
||||
UZ "Uzbek"
|
||||
VI "Vietnamese"
|
||||
VO "Volapuk"
|
||||
WO "Wolof"
|
||||
XH "Xhosa"
|
||||
YO "Yoruba"
|
||||
ZH "Chinese"
|
||||
ZU "Zulu"
|
||||
@@ -1,48 +0,0 @@
|
||||
Morphological Paradigms in the GF Resource Grammar Library
|
||||
Aarne Ranta
|
||||
|
||||
|
||||
This is a synopsis of the main morphological paradigms for
|
||||
nouns (``N``), adjectives (``A``), and verbs (``V``).
|
||||
|
||||
|
||||
=English=
|
||||
|
||||
```
|
||||
mkN : (flash : Str) -> N ; -- car, bus, ax, hero, fly, boy
|
||||
mkN : (man,men : Str) -> N ; -- index, indices
|
||||
mkN : (man,men,man's,men's : Str) -> N ;
|
||||
mkN : Str -> N -> N ; -- baby boom
|
||||
|
||||
mkA : (happy : Str) -> A ; -- small, happy, free
|
||||
mkA : (fat,fatter : Str) -> A ;
|
||||
mkA : (good,better,best,well : Str) -> A
|
||||
compoundA : A -> A ; -- -/more/most ridiculous
|
||||
|
||||
mkV : (cry : Str) -> V ; -- call, kiss, echo, cry, pray
|
||||
mkV : (stop,stopped : Str) -> V ;
|
||||
mkV : (drink,drank,drunk : Str) -> V ;
|
||||
mkV : (run,ran,run,running : Str) -> V ;
|
||||
mkV : (go,goes,went,gone,going : Str) -> V
|
||||
```
|
||||
|
||||
=French=
|
||||
|
||||
```
|
||||
mkN : (cheval : Str) -> N ; -- pas, prix, nez, bijou, cheval
|
||||
mkN : (foie : Str) -> Gender -> N ;
|
||||
mkN : (oeil,yeux : Str) -> Gender -> N ;
|
||||
mkN : N -> Str -> N
|
||||
|
||||
mkA : (cher : Str) -> A ; -- banal, heureux, italien, jeune, amer, carré, joli
|
||||
mkA : (sec,seche : Str) -> A ;
|
||||
mkA : (banal,banale,banaux,banalement : Str) -> A ;
|
||||
mkA : (bon : A) -> (meilleur : A) -> A
|
||||
prefixA : A -> A ;
|
||||
|
||||
mkV : (finir : Str) -> V ; -- aimer, céder, placer, manger, payer, finir
|
||||
mkV : (jeter,jette,jettera : Str) -> V ;
|
||||
mkV : V2 -> V
|
||||
etreV : V -> V ;
|
||||
reflV : V -> V ;
|
||||
```
|
||||
@@ -1,529 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<META NAME="generator" CONTENT="http://txt2tags.org">
|
||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf8">
|
||||
<LINK REL="stylesheet" TYPE="text/css" HREF="../../css/style.css">
|
||||
<TITLE>GF Resource Grammar Library Documentation and Publications</TITLE>
|
||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
||||
<CENTER>
|
||||
<H1>GF Resource Grammar Library Documentation and Publications</H1>
|
||||
<FONT SIZE="4"><I>Aarne Ranta</I></FONT><BR>
|
||||
<FONT SIZE="4">20170119</FONT>
|
||||
</CENTER>
|
||||
|
||||
<P>
|
||||
<I>To be completed. Contributions welcome - in particular, links to open access publications!</I>
|
||||
</P>
|
||||
|
||||
<H3>Afrikaans</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/afrikaans">http://www.grammaticalframework.org/lib/src/afrikaans</A> (Laurette Pretorius, Laurette Marais)
|
||||
</UL>
|
||||
|
||||
<H3>Amharic</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/amharic">http://www.grammaticalframework.org/lib/src/amharic</A> (Markos Kassa Gobena)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Markos Kassa Gobena.
|
||||
<I>Implementing and Open Source Amharic Resource Grammar in GF</I>,
|
||||
MSc thesis, Chalmers University, 2010.
|
||||
<A HREF="http://publications.lib.chalmers.se/records/fulltext/146295.pdf">http://publications.lib.chalmers.se/records/fulltext/146295.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Arabic</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/arabic">http://www.grammaticalframework.org/lib/src/arabic</A> (Ali El Dada)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Ali El Dada.
|
||||
<I>Arabic Resource Grammar in GF</I>, MSc Thesis, Chalmers University, 2006.
|
||||
<P></P>
|
||||
A. El Dada and A. Ranta.
|
||||
Implementing an Open Source Arabic Resource Grammar in GF.
|
||||
In M. Mughazy (ed),
|
||||
<I>Perspectives on Arabic Linguistics XX. Papers from the Twentieth Annual Symposium on Arabic Linguistics, Kalamazoo, March 26</I>
|
||||
John Benjamins Publishing Company.
|
||||
2007.
|
||||
<br>
|
||||
<I>An outline of the Arabic resource grammar project, focusing on linguistic aspects.</I>
|
||||
<P></P>
|
||||
A. El Dada.
|
||||
Implementation of the Arabic Numerals and their Syntax in GF.
|
||||
Computational Approaches to Semitic Languages: Common Issues and Resources,
|
||||
ACL-2007 Workshop,
|
||||
June 28, 2007, Prague.
|
||||
2007.
|
||||
<A HREF="http://acl.ldc.upenn.edu/W/W07/W07-08.pdf">http://acl.ldc.upenn.edu/W/W07/W07-08.pdf</A>
|
||||
<br>
|
||||
<I>A case study with the resource grammar, focusing on the morphosyntax</I>
|
||||
<I>and agreement of constructions with numerals.</I>
|
||||
</UL>
|
||||
|
||||
<H3>Bulgarian</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/bulgarian">http://www.grammaticalframework.org/lib/src/bulgarian</A> (Krasimir Angelov)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
K. Angelov.
|
||||
Type-Theoretical Bulgarian Grammar.
|
||||
In B. Nordström and A. Ranta (eds),
|
||||
<I>Advances in Natural Language Processing (GoTAL 2008)</I>,
|
||||
LNCS/LNAI 5221, Springer,
|
||||
2008.
|
||||
<A HREF="http://link.springer.com/chapter/10.1007%2F978-3-540-85287-2_6">http://link.springer.com/chapter/10.1007%2F978-3-540-85287-2_6</A>
|
||||
<br>
|
||||
<I>Explains the implementation of a Bulgarian resource grammar in GF.</I>
|
||||
</UL>
|
||||
|
||||
<H3>Catalan</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/catalan">http://www.grammaticalframework.org/lib/src/catalan</A> <A HREF="http://www.grammaticalframework.org/lib/src/romance">http://www.grammaticalframework.org/lib/src/romance</A> (Jordi Saludes, Inari Listenmaa)
|
||||
</UL>
|
||||
|
||||
<H3>Chinese</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/chinese">http://www.grammaticalframework.org/lib/src/chinese</A> (Aarne Ranta, Zhuo Lin Qiqige, Chen Peng, Qiao Haiyan)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Chen Peng,
|
||||
Implementation of a Chinese Resource Grammar in Grammatical Framework.
|
||||
<I>International Journal of Knowledge and Language Processing</I>,
|
||||
4(1),
|
||||
2013,
|
||||
pp. 26-34.
|
||||
<A HREF="http://www.ijklp.org/archives/vol4no1/Implementation%20of%20Chinese%20Resource%20Grammar%20in%20Grammatical%20Framework.pdf">http://www.ijklp.org/archives/vol4no1/Implementation%20of%20Chinese%20Resource%20Grammar%20in%20Grammatical%20Framework.pdf</A>
|
||||
<P></P>
|
||||
Aarne Ranta.
|
||||
Grammatical Framework and Chinese.
|
||||
Appendix to the GF book (A. Ranta, <I>Grammatical Framework</I>, CSLI 2011),
|
||||
2012.
|
||||
<A HREF="http://www.grammaticalframework.org/gf-book/gf-chinese-appendix.pdf">http://www.grammaticalframework.org/gf-book/gf-chinese-appendix.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Danish</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/danish">http://www.grammaticalframework.org/lib/src/danish</A> <A HREF="http://www.grammaticalframework.org/lib/src/scandinavian">http://www.grammaticalframework.org/lib/src/scandinavian</A> (Aarne Ranta)
|
||||
</UL>
|
||||
|
||||
<H3>Dutch</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/dutch">http://www.grammaticalframework.org/lib/src/dutch</A> (Aarne Ranta, Femke Johansson)
|
||||
</UL>
|
||||
|
||||
<H3>English</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/english">http://www.grammaticalframework.org/lib/src/english</A> (Aarne Ranta, Björn Bringert, Krasimir Angelov)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
A. Ranta.
|
||||
The GF Resource Grammar Library.
|
||||
<I>Linguistic Issues in Language Technology</I>,
|
||||
2 (2),
|
||||
2009.
|
||||
<A HREF="http://elanguage.net/journals/index.php/lilt/article/viewFile/214/158">PDF</A>
|
||||
<br>
|
||||
<I>A systematic presentation of the library from the linguistic point of view.</I>
|
||||
<I>Not only about English, but English examples abound.</I>
|
||||
</UL>
|
||||
|
||||
<H3>Estonian</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/estonian">http://www.grammaticalframework.org/lib/src/estonian</A> (Kaarel Kaljurand, Inari Listenmaa)
|
||||
</UL>
|
||||
|
||||
<H3>Finnish</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/finnish">http://www.grammaticalframework.org/lib/src/finnish</A> (Aarne Ranta, Inari Listenmaa)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
A. Ranta.
|
||||
On the Syntax and Translation of Finnish Discourse Clitics.
|
||||
In D. Santos, K. Lindén and W. Ng'ang'a (eds),
|
||||
<I>Shall We Play the Festschrift Game?</I>
|
||||
<I>Essays on the Occasion of Lauri Carlson's 60th Birthday</I>.
|
||||
Springer, Heidelberg, 2012.
|
||||
pp. 227-241.
|
||||
<A HREF="http://link.springer.com/chapter/10.1007/978-3-642-30773-7_14">http://link.springer.com/chapter/10.1007/978-3-642-30773-7_14</A>
|
||||
draft version <A HREF="http://www.cse.chalmers.se/~aarne/articles/discourse-clitics.pdf">http://www.cse.chalmers.se/~aarne/articles/discourse-clitics.pdf</A>
|
||||
<P></P>
|
||||
A. Ranta.
|
||||
How predictable is Finnish morphology? An experiment on lexicon construction.
|
||||
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
|
||||
<I>Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein</I>,
|
||||
University of Uppsala,
|
||||
2008.
|
||||
<A HREF="http://publications.uu.se/abstract.xsql?dbid=8933">http://publications.uu.se/abstract.xsql?dbid=8933</A>
|
||||
<br>
|
||||
<I>Presents an experiment on smart paradigms in Finnish.</I>
|
||||
</UL>
|
||||
|
||||
<H3>French</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/french">http://www.grammaticalframework.org/lib/src/french</A> <A HREF="http://www.grammaticalframework.org/lib/src/romance">http://www.grammaticalframework.org/lib/src/romance</A> (Aarne Ranta, Ramona Enache)
|
||||
</UL>
|
||||
|
||||
<H3>German</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/german">http://www.grammaticalframework.org/lib/src/german</A> (Aarne Ranta, Harald Hammarström, Erzsébet Galgóczy)
|
||||
</UL>
|
||||
|
||||
<H3>Greek</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/greek">http://www.grammaticalframework.org/lib/src/greek</A> (Ioanna Papadopoulou)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Ioanna Papadopoulou.
|
||||
GF Modern Greek Resource Grammar,
|
||||
MA thesis, University of Gothenburg,
|
||||
2013.
|
||||
<P></P>
|
||||
Ioanna Papadopoulou.
|
||||
GF Modern Greek Resource Grammar.
|
||||
RANLP 2013.
|
||||
</UL>
|
||||
|
||||
<H3>Hebrew</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/hebrew">http://www.grammaticalframework.org/lib/src/hebrew</A> (Dana Dannélls)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
D. Dannélls and J. Camilleri.
|
||||
Verb Morphology of Hebrew and Maltese - Towards an Open Source Type Theoretical Resource Grammar in GF.
|
||||
<I>Proceedings of the Language Resources (LRs) and Human Language Technologies (HLT) for Semitic Languages Status, Updates, and Prospects, LREC-2010 Workshop</I>,
|
||||
Malta, pp. 57-61.
|
||||
2010.
|
||||
<A HREF="http://spraakdata.gu.se/svedd/pub/lrec10.pdf">http://spraakdata.gu.se/svedd/pub/lrec10.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Hindi</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/hindi">http://www.grammaticalframework.org/lib/src/hindi</A> <A HREF="http://www.grammaticalframework.org/lib/src/hindustani">http://www.grammaticalframework.org/lib/src/hindustani</A> (Shafqat Virk, K.V.S. Prasad, Muhammad Humayoun, Aarne Ranta)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Shafqat Virk.
|
||||
<I>Computational Linguistics Resources for Indo-Iranian Languages</I>,
|
||||
PhD Thesis, University of Gothenburg, 2013.
|
||||
<A HREF="http://www.cse.chalmers.se/~virk/shafqat-phd-thesis.pdf">http://www.cse.chalmers.se/~virk/shafqat-phd-thesis.pdf</A>
|
||||
<P></P>
|
||||
K.V.S. Prasad and Shafqat Virk.
|
||||
Computational evidence that
|
||||
Hindi and Urdu share a grammar but not the lexicon.
|
||||
In The 3rd Workshop
|
||||
on South and Southeast Asian NLP, COLING 2012. <I>Reprinted in Shafqat's thesis</I>
|
||||
</UL>
|
||||
|
||||
<H3>Icelandic</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/icelandic">http://www.grammaticalframework.org/lib/src/icelandic</A> (Bjarki Traustason)
|
||||
<P></P>
|
||||
<B>Publications</B>
|
||||
<P></P>
|
||||
Bjarki Traustason, MSc thesis, Chalmers
|
||||
</UL>
|
||||
|
||||
<H3>Interlingua</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/interlingua">http://www.grammaticalframework.org/lib/src/interlingua</A> (Jean-Philippe Bernardy)
|
||||
</UL>
|
||||
|
||||
<H3>Italian</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/italian">http://www.grammaticalframework.org/lib/src/italian</A> <A HREF="http://www.grammaticalframework.org/lib/src/romance">http://www.grammaticalframework.org/lib/src/romance</A> (Aarne Ranta, Ramona Enache, Gabriele Paganelli)
|
||||
</UL>
|
||||
|
||||
<H3>Japanese</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/japanese">http://www.grammaticalframework.org/lib/src/japanese</A> (Liza Zimina)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Elizaveta Zimina.
|
||||
GF Japanese Resource Grammar,
|
||||
MA Thesis, University of Gothenburg,
|
||||
2012.
|
||||
<A HREF="http://www.ling.gu.se/~lager/MLT/zimina_thesis_draft.pdf">http://www.ling.gu.se/~lager/MLT/zimina_thesis_draft.pdf</A>
|
||||
<P></P>
|
||||
Elizaveta Zimina.
|
||||
Fitting a Round Peg in a Square Hole: Japanese Resource Grammar in GF.
|
||||
<I>Advances in Natural Language Processing (JapTAL-2012)</I>
|
||||
Lecture Notes in Computer Science Volume 7614, 2012, pp 156-167.
|
||||
<A HREF="http://link.springer.com/chapter/10.1007%2F978-3-642-33983-7_16">http://link.springer.com/chapter/10.1007%2F978-3-642-33983-7_16</A>
|
||||
</UL>
|
||||
|
||||
<H3>Latin</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/latin">http://www.grammaticalframework.org/lib/src/latin</A> (Aarne Ranta)
|
||||
</UL>
|
||||
|
||||
<H3>Latvian</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/latvian">http://www.grammaticalframework.org/lib/src/latvian</A> (Normunds Gruzitis, Peter Paikens)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
P. Paikens and N. Gruzitis.
|
||||
An implementation of a Latvian resource grammar in Grammatical Framework,
|
||||
LREC 2012, pp. 1680-1685.
|
||||
<A HREF="http://lrec.elra.info/proceedings/lrec2012/pdf/976_Paper.pdf">http://lrec.elra.info/proceedings/lrec2012/pdf/976_Paper.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Maltese</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/maltese">http://www.grammaticalframework.org/lib/src/maltese</A> (John J. Camilleri)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
John J. Camilleri,
|
||||
MSc thesis, Chalmers University of Technology, 2013.
|
||||
<P></P>
|
||||
John J. Camilleri.
|
||||
A computational grammar for Maltese: Talk at the 4th International Conference on Maltese Linguistics (GĦILM 2013). Lyon, France, 2013.
|
||||
<A HREF="http://academic.johnjcamilleri.com/presentations/2013-06%20G%C4%A6ILM%201.pdf">http://academic.johnjcamilleri.com/presentations/2013-06%20G%C4%A6ILM%201.pdf</A>
|
||||
<P></P>
|
||||
D. Dannélls and J. Camilleri.
|
||||
Verb Morphology of Hebrew and Maltese - Towards an Open Source Type Theoretical Resource Grammar in GF.
|
||||
<I>Proceedings of the Language Resources (LRs) and Human Language Technologies (HLT) for Semitic Languages Status, Updates, and Prospects, LREC-2010 Workshop</I>,
|
||||
Malta, pp. 57-61.
|
||||
2010.
|
||||
<A HREF="http://spraakdata.gu.se/svedd/pub/lrec10.pdf">http://spraakdata.gu.se/svedd/pub/lrec10.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Nepali</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/nepali">http://www.grammaticalframework.org/lib/src/nepali</A> (Dinesh Simk)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Dinesh Simk.
|
||||
Implementing the GF Resource Grammar for Nepali Language,
|
||||
MSc thesis, Chalmers University of Technology,
|
||||
2012.
|
||||
<A HREF="http://publications.lib.chalmers.se/records/fulltext/161384.pdf">http://publications.lib.chalmers.se/records/fulltext/161384.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Norwegian (bokmål)</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/norwegian">http://www.grammaticalframework.org/lib/src/norwegian</A> <A HREF="http://www.grammaticalframework.org/lib/src/scandinavian">http://www.grammaticalframework.org/lib/src/scandinavian</A> (Aarne Ranta)
|
||||
</UL>
|
||||
|
||||
<H3>Norwegian (nynorsk)</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/nynorsk">http://www.grammaticalframework.org/lib/src/nynorsk</A> (Stian Rødven Eide) <A HREF="http://www.grammaticalframework.org/lib/src/scandinavian">http://www.grammaticalframework.org/lib/src/scandinavian</A> (Aarne Ranta)
|
||||
</UL>
|
||||
|
||||
<H3>Persian</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/persian">http://www.grammaticalframework.org/lib/src/persian</A> (Shafqat Virk, Elnaz Abolahrar, Sofy Moradi)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Shafqat Mumtaz Virk and Elnaz Abolahrar.
|
||||
An Open Source Persian Computational Grammar,
|
||||
LREC 2012, pp. 1686-1693.
|
||||
<A HREF="http://www.lrec-conf.org/proceedings/lrec2012/pdf/1028_Paper.pdf">http://www.lrec-conf.org/proceedings/lrec2012/pdf/1028_Paper.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Polish</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/polish">http://www.grammaticalframework.org/lib/src/polish</A> (Adam Slaski, Ilona Novak)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Adam Slaski.
|
||||
Some Interesting Features of the Polish Language in Grammatical Framework.
|
||||
Slide presentation, TYPES 2010, Warsaw,
|
||||
2010.
|
||||
<A HREF="http://www.mimuw.edu.pl/~asl/publications/types2010-slides.pdf">http://www.mimuw.edu.pl/~asl/publications/types2010-slides.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Punjabi</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/punjabi">http://www.grammaticalframework.org/lib/src/punjabi</A> (Shafqat Virk, Muhammad Humayoun)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
S. Virk, M. Humayoun, and A. Ranta.
|
||||
An Open-Source Punjabi Resource Grammar.
|
||||
Proceedings of RANLP-2011, Recent Advances in Natural Language Processing,
|
||||
Hissar, Bulgaria, 12-14 September, 2011.
|
||||
pp. 70-76.
|
||||
<A HREF="http://lml.bas.bg/~iva/ranlp2011/RANLR2011_Proceedings.PDF">http://lml.bas.bg/~iva/ranlp2011/RANLR2011_Proceedings.PDF</A>
|
||||
<P></P>
|
||||
M. Humayoun and A. Ranta.
|
||||
Developing Punjabi Morphology, Corpus and Lexicon.
|
||||
<I>The 24th Pacific Asia conference on Language, Information and Computation (PACLIC24)</I>,
|
||||
2010.
|
||||
</UL>
|
||||
|
||||
<H3>Romanian</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/romanian">http://www.grammaticalframework.org/lib/src/romanian</A> (Ramona Enache)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
R. Enache, A. Ranta, and K. Angelov.
|
||||
An Open-Source Computational Grammar of Romanian.
|
||||
A. Gelbukh (ed.), <I>CiCLING-2010</I>,
|
||||
LNCS 6008,
|
||||
2010.
|
||||
</UL>
|
||||
|
||||
<H3>Russian</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/russian">http://www.grammaticalframework.org/lib/src/russian</A> (Janna Khegai, Nikita Frolov)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
J. Khegai.
|
||||
GF parallel resource grammars and Russian.
|
||||
In proceedings of ACL2006
|
||||
(The joint conference of the International Committee on Computational
|
||||
Linguistics and the Association for Computational Linguistics) (pp. 475-482),
|
||||
Sydney, Australia, July 2006.
|
||||
<P></P>
|
||||
J. Khegai and A. Ranta.
|
||||
Building and Using a Russian Resource Grammar in GF.
|
||||
In A. Gelbukh (ed),
|
||||
<I>Intelligent Text Processing and Computational Linguistics (CICLing-2004)</I>,
|
||||
Seoul, Korea, February 2003,
|
||||
Springer LNCS 945,
|
||||
pp. 38-41,
|
||||
2004.
|
||||
</UL>
|
||||
|
||||
<H3>Sindhi</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/sindhi">http://www.grammaticalframework.org/lib/src/sindhi</A> (Jherna Devi Oad)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Jherna Devi Oad.
|
||||
Implementing GF Resource Grammar for Sindhi language,
|
||||
MSc Thesis, Chalmers University of Technology,
|
||||
2012.
|
||||
<A HREF="http://publications.lib.chalmers.se/records/fulltext/163234.pdf">http://publications.lib.chalmers.se/records/fulltext/163234.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Spanish</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/spanish">http://www.grammaticalframework.org/lib/src/spanish</A> <A HREF="http://www.grammaticalframework.org/lib/src/romance">http://www.grammaticalframework.org/lib/src/romance</A>
|
||||
(Aarne Ranta, Ingrid Andersson, Therese Söderberg, Inari Listenmaa)
|
||||
</UL>
|
||||
|
||||
<H3>Swahili</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/swahili">http://www.grammaticalframework.org/lib/src/swahili</A> (Wanjiku Ng'ang'a)
|
||||
</UL>
|
||||
|
||||
<UL>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Wanjiku Ng'ng'a.
|
||||
Building Swahili Resource Grammars for the Grammatical Framework,
|
||||
In D. Santos, K. Lindén and W. Ng'ang'a (eds),
|
||||
<I>Shall We Play the Festschrift Game?</I>
|
||||
<I>Essays on the Occasion of Lauri Carlson's 60th Birthday</I>.
|
||||
Springer, Heidelberg, 2012.
|
||||
pp. 227-241.
|
||||
<A HREF="http://link.springer.com/chapter/10.1007/978-3-642-30773-7_13">http://link.springer.com/chapter/10.1007/978-3-642-30773-7_13</A>
|
||||
</UL>
|
||||
|
||||
<H3>Swedish</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/swedish">http://www.grammaticalframework.org/lib/src/swedish</A> <A HREF="http://www.grammaticalframework.org/lib/src/scandinavian">http://www.grammaticalframework.org/lib/src/scandinavian</A> (Aarne Ranta, Malin Ahlberg, Markus Forsberg)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
Malin Ahlberg.
|
||||
<I>Towards a Wide-Coverage Grammar for Swedish Using GF</I>,
|
||||
MSc thesis, University of Gothenburg,
|
||||
2012.
|
||||
<A HREF="https://gupea.ub.gu.se/bitstream/2077/28838/1/gupea_2077_28838_1.pdf">https://gupea.ub.gu.se/bitstream/2077/28838/1/gupea_2077_28838_1.pdf</A>
|
||||
<P></P>
|
||||
Malin Ahlberg and Ramona Enache.
|
||||
A Type-Theoretical Wide-Coverage Computational Grammar for Swedish.
|
||||
P. Sojka et al (eds), TSD 2012, LNCS 7499, pp. 183-190.
|
||||
<A HREF="http://link.springer.com/content/pdf/10.1007%2F978-3-642-32790-2_22.pdf">http://link.springer.com/content/pdf/10.1007%2F978-3-642-32790-2_22.pdf</A>
|
||||
</UL>
|
||||
|
||||
<H3>Thai</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/thai">http://www.grammaticalframework.org/lib/src/thai</A> (Aarne Ranta, Chotiros Kairoje)
|
||||
</UL>
|
||||
|
||||
<H3>Turkish</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/turkish">http://www.grammaticalframework.org/lib/src/turkish</A> (Server Cimen)
|
||||
</UL>
|
||||
|
||||
<H3>Urdu</H3>
|
||||
|
||||
<UL>
|
||||
<LI><B>Source</B>: <A HREF="http://www.grammaticalframework.org/lib/src/urdu">http://www.grammaticalframework.org/lib/src/urdu</A> <A HREF="http://www.grammaticalframework.org/lib/src/hindustani">http://www.grammaticalframework.org/lib/src/hindustani</A> (Shafqat Virk, Muhamman Humayoun)
|
||||
<P></P>
|
||||
<LI><B>Publications</B>
|
||||
<P></P>
|
||||
S. Virk, M. Humayoun, and A. Ranta.
|
||||
An Open Source Urdu Resource Grammar.
|
||||
<I>Proceedings of the 8th Workshop on Asian Language Resources (Coling 2010 workshop)</I>,
|
||||
2010.
|
||||
<P></P>
|
||||
M. Humayoun, H. Hammarström, and A. Ranta.
|
||||
Urdu Morphology, Orthography and Lexicon Extraction.
|
||||
<I>CAASL-2: The Second Workshop on Computational Approaches to Arabic Script-based Languages</I>,
|
||||
July 21-22, 2007, LSA 2007 Linguistic Institute, Stanford University.
|
||||
2007.
|
||||
<P></P>
|
||||
See also <B>Hindi</B> above.
|
||||
</UL>
|
||||
|
||||
<!-- html code generated by txt2tags 2.6 (http://txt2tags.org) -->
|
||||
<!-- cmdline: txt2tags -thtml rgl-publications.txt -->
|
||||
</BODY></HTML>
|
||||
862
doc/status.html
862
doc/status.html
@@ -1,862 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<META NAME="generator" CONTENT="http://txt2tags.org">
|
||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf8">
|
||||
<TITLE>The Status of the GF Resource Grammar Library</TITLE>
|
||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
||||
<CENTER>
|
||||
<H1>The Status of the GF Resource Grammar Library</H1>
|
||||
<FONT SIZE="4"><I>Aarne Ranta</I></FONT><BR>
|
||||
<FONT SIZE="4">20170119</FONT>
|
||||
</CENTER>
|
||||
|
||||
<P>
|
||||
The following table gives the languages currently available in the
|
||||
GF Resource Grammar Library.
|
||||
</P>
|
||||
<P>
|
||||
For another view, see the
|
||||
<A HREF="http://www.postcrashgames.com/gf_world/">The Resource Grammar Library coverage map</A> .
|
||||
</P>
|
||||
<P>
|
||||
Corrections and additions are welcome! Notice that only those parts of implementations
|
||||
that are currently available via <A HREF="http://grammaticalframework.org">http://grammaticalframework.org</A>
|
||||
are marked in the table
|
||||
</P>
|
||||
|
||||
<TABLE BORDER="1" CELLPADDING="4">
|
||||
<TR>
|
||||
<TH>ISO</TH>
|
||||
<TH>Language</TH>
|
||||
<TH>Darcs</TH>
|
||||
<TH>Mini</TH>
|
||||
<TH>Parad</TH>
|
||||
<TH>Lex</TH>
|
||||
<TH>Lang</TH>
|
||||
<TH>API</TH>
|
||||
<TH>Symb</TH>
|
||||
<TH>Irreg</TH>
|
||||
<TH>Dict</TH>
|
||||
<TH>Trans</TH>
|
||||
<TH>tested</TH>
|
||||
<TH>publ</TH>
|
||||
<TH COLSPAN="2">authors</TH>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Afr</TD>
|
||||
<TD>Afrikaans</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*LP,LM</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Amh</TD>
|
||||
<TD>Amharic</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*MK</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Ara</TD>
|
||||
<TD>Arabic</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>AD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Bul</TD>
|
||||
<TD>Bulgarian</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*KA</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Cat</TD>
|
||||
<TD>Catalan</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*JS,*IL</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Chi</TD>
|
||||
<TD>Chinese</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>ZL,*AR,*CP,QH</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Dan</TD>
|
||||
<TD>Danish</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Dut</TD>
|
||||
<TD>Dutch</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR,FJ</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Eng</TD>
|
||||
<TD>English</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*AR,BB,KA</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Est</TD>
|
||||
<TD>Estonian</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*KK,*IL</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Fin</TD>
|
||||
<TD>Finnish</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*AR,*IL</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Fre</TD>
|
||||
<TD>French</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR,RE</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Ger</TD>
|
||||
<TD>German</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR,HH,EG</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Gre</TD>
|
||||
<TD>Greek(mod)</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*IP</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Grc</TD>
|
||||
<TD>Greek(anc)</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*HLe</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Heb</TD>
|
||||
<TD>Hebrew</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*DD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Hin</TD>
|
||||
<TD>Hindi</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*SV,*KP,MH,AR,PK</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Ice</TD>
|
||||
<TD>Icelandic</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*BT</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Ina</TD>
|
||||
<TD>Interlingua</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>JB</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Ita</TD>
|
||||
<TD>Italian</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR,*RE,GP</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Jpn</TD>
|
||||
<TD>Japanese</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*LZ</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Lat</TD>
|
||||
<TD>Latin</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR,*HLa</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Lav</TD>
|
||||
<TD>Latvian</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*NG,*PP</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Mlt</TD>
|
||||
<TD>Maltese</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*JC</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Mon</TD>
|
||||
<TD>Mongolian</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*NE</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Nep</TD>
|
||||
<TD>Nepali</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*DS</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Nno</TD>
|
||||
<TD>Norwegian(n)</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*SRE</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Nor</TD>
|
||||
<TD>Norwegian(b)</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Pes</TD>
|
||||
<TD>Persian</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*SV,*EA,SM</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Pnb</TD>
|
||||
<TD>Punjabi</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*SV,MH</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Pol</TD>
|
||||
<TD>Polish</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>IN,*AS</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Ron</TD>
|
||||
<TD>Romanian</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*RE</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Rus</TD>
|
||||
<TD>Russian</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>JK,*NF</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Snd</TD>
|
||||
<TD>Sindhi</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*SV,*JD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Spa</TD>
|
||||
<TD>Spanish</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR,IA,TS,*IL</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Swa</TD>
|
||||
<TD>Swahili</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*WN,JM</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Swe</TD>
|
||||
<TD>Swedish</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*MA,*AR,MF</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Tha</TD>
|
||||
<TD>Thai</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*AR,CK</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Tsn</TD>
|
||||
<TD>Tswana</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*LPs,AB</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Tur</TD>
|
||||
<TD>Turkish</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>*SC,KA</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Urd</TD>
|
||||
<TD>Urdu</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>++</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>-</TD>
|
||||
<TD>+</TD>
|
||||
<TD>+</TD>
|
||||
<TD>*SV,MH</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
|
||||
<P>
|
||||
ISO = 3-letter ISO language code, used in library file names
|
||||
(mostly ISO 639-2 B (bibliographic))
|
||||
</P>
|
||||
<P>
|
||||
Darcs = available in the darcs repository of <S><A HREF="http://code.haskell.org/gf">http://code.haskell.org/gf</A></S> <A HREF="http://www.grammaticalframework.org/">http://www.grammaticalframework.org/</A>
|
||||
</P>
|
||||
<P>
|
||||
Mini = minimal resource, compiles with <CODE>make minimal</CODE> (obsolete)
|
||||
</P>
|
||||
<P>
|
||||
Parad = <CODE>Paradigms</CODE> file complete for major POS, ++ means with smart paradigms
|
||||
</P>
|
||||
<P>
|
||||
Lex = the resource <CODE>Lexicon</CODE> (nearly) complete
|
||||
</P>
|
||||
<P>
|
||||
Lang = the resource <CODE>Syntax</CODE> (nearly) complete
|
||||
</P>
|
||||
<P>
|
||||
API = the <CODE>Syntax</CODE> compiles
|
||||
</P>
|
||||
<P>
|
||||
API = the <CODE>Symbolic</CODE> API compiles
|
||||
</P>
|
||||
<P>
|
||||
Irreg = the <CODE>Irreg</CODE> module with irregular verbs exists
|
||||
</P>
|
||||
<P>
|
||||
Dict = the <CODE>Dict</CODE> module, large-scale morphological lexicon, exists
|
||||
</P>
|
||||
<P>
|
||||
Trans = large-scale translation module and dictionary exists
|
||||
</P>
|
||||
<P>
|
||||
tested = tested in some applications, ++ means extensively tested with no major issues
|
||||
</P>
|
||||
<P>
|
||||
publ = publications available, see <A HREF="./rgl-publications.html">RGL publication list</A>
|
||||
</P>
|
||||
<P>
|
||||
authors = main contributors, * means still active
|
||||
(ready to fix bugs, answer to questions, etc)
|
||||
</P>
|
||||
|
||||
<H3>Author codes</H3>
|
||||
|
||||
<P>
|
||||
AB Ansu Berg,
|
||||
AD Ali El Dada,
|
||||
AR Aarne Ranta,
|
||||
AS Adam Slaski,
|
||||
BB Björn Bringert,
|
||||
BT Bjarki Traustason,
|
||||
CK Chotiros Kairoje,
|
||||
CP Chen Peng,
|
||||
DD Dana Dannélls,
|
||||
DS Dinesh Simk,
|
||||
EA Elnaz Abolahrar,
|
||||
EG Erzsébet Galgóczy
|
||||
FJ Femke Johansson,
|
||||
HH Harald Hammarström,
|
||||
HLa Herbert Lange,
|
||||
HLe Hans Leiss,
|
||||
GP Gabriele Paganelli,
|
||||
IA Ingrid Andersson,
|
||||
IL Inari Listenmaa,
|
||||
IN Ilona Novak,
|
||||
IP Ioanna Papadopoulou,
|
||||
JB Jean-Philippe Bernardy,
|
||||
JC John J. Camilleri,
|
||||
JD Jherna Devi,
|
||||
JK Janna Khegai,
|
||||
JM Juliet Mutahi,
|
||||
JS Jordi Saludes,
|
||||
KA Krasimir Angelov,
|
||||
KK Kaarel Kaljurand,
|
||||
KP Kuchi Prasad,
|
||||
LM Laurette Marais,
|
||||
LP Laurette Pretorius,
|
||||
LZ Liza Zimina,
|
||||
MA Malin Ahlberg,
|
||||
MF Markus Forsberg,
|
||||
MK Markos Kassa Gobena,
|
||||
MH Muhammad Humayoun,
|
||||
NE Nyamsuren Erdenebadrakh,
|
||||
NF Nick Frolov,
|
||||
NG Normunds Gruzitis,
|
||||
QH Qiao Haiyan,
|
||||
RE Ramona Enache,
|
||||
PP Peteris Paikens,
|
||||
SC Server Cimen,
|
||||
SM Sofy Moradi,
|
||||
SRE Stian Rødven Eide,
|
||||
SV Shafqat Virk,
|
||||
TH Therese Söderberg,
|
||||
WN Wanjiku Ng'ang'a,
|
||||
ZL Zhuo Lin Qiqige
|
||||
</P>
|
||||
|
||||
<H2>Rules</H2>
|
||||
|
||||
<P>
|
||||
Only components available at <A HREF="http://grammaticalframework.org">http://grammaticalframework.org</A> are indicated in the table
|
||||
(exceptions: Ancient Greek, Mongolian, to appear soon).
|
||||
</P>
|
||||
<P>
|
||||
If you want to work on a language already in the table, please be kind and contact the
|
||||
active authors of it.
|
||||
</P>
|
||||
<P>
|
||||
Feel free to start a new language that is not yet in the table - but let us know and
|
||||
contribute some code as soon as you can!
|
||||
</P>
|
||||
|
||||
<!-- html code generated by txt2tags 2.6 (http://txt2tags.org) -->
|
||||
<!-- cmdline: txt2tags -thtml status.txt -->
|
||||
</BODY></HTML>
|
||||
5
doc/.gitignore → doc/synopsis/.gitignore
vendored
5
doc/.gitignore → doc/synopsis/.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
index.txt
|
||||
index.html
|
||||
api-examples-*.txt
|
||||
api-examples.gfs
|
||||
categories-imagemap.html
|
||||
synopsis.txt
|
||||
synopsis.html
|
||||
categories.png
|
||||
67
doc/synopsis/Makefile
Normal file
67
doc/synopsis/Makefile
Normal file
@@ -0,0 +1,67 @@
|
||||
# Your GF_LIB_PATH must be set in order for this build script to work
|
||||
|
||||
.PHONY: all index clean
|
||||
|
||||
GF_alltenses=$(GF_LIB_PATH)/alltenses
|
||||
GF=gf
|
||||
GFDOC=gfdoc
|
||||
|
||||
ROOT=../..
|
||||
S=$(ROOT)/src
|
||||
CONFIG=$(ROOT)/languages.csv
|
||||
|
||||
# List of languages extracted from languages.csv, with 'Synopsis' column == y
|
||||
LANGS=$(shell cat $(CONFIG) | cut -d',' -f1,10 | grep ',y' | cut -d',' -f1)
|
||||
|
||||
# This list was constructed by observing what files MkSynopsis.hs reads
|
||||
SRC_FILES=$(S)/abstract/Common.gf $(S)/abstract/Cat.gf $(S)/api/Constructors.gf $(S)/abstract/Structural.gf $(patsubst %,$S/*/Paradigms%.gf,$(LANGS))
|
||||
|
||||
EXAMPLES_OUT=$(patsubst %,api-examples-%.txt,$(LANGS))
|
||||
INCLUDES=intro.txt categories-intro.txt categories-imagemap.html additional.txt browse.txt example.txt
|
||||
|
||||
TMP=tmp.html
|
||||
TEMPLATE=template.html
|
||||
|
||||
all: index
|
||||
|
||||
index: index.html
|
||||
|
||||
index.txt: MkSynopsis.hs MkExxTable.hs $(INCLUDES) $(EXAMPLES_OUT) $(SRC_FILES)
|
||||
runghc -i$(ROOT) MkSynopsis.hs
|
||||
|
||||
index.html: index.txt $(TEMPLATE)
|
||||
txt2tags --target=html --no-headers --quiet --toc --outfile=$@ --infile=$<
|
||||
pandoc \
|
||||
--from=html \
|
||||
--to=html5 \
|
||||
--standalone \
|
||||
--template=$(TEMPLATE) \
|
||||
--css=synopsis.css \
|
||||
--metadata='title:"GF Resource Grammar Library: Synopsis"' \
|
||||
--variable='rel-root:$(ROOT)/..' \
|
||||
--output=$(TMP) \
|
||||
$@
|
||||
mv $(TMP) $@
|
||||
|
||||
categories.png: categories.dot
|
||||
dot -Tpng $^ > $@
|
||||
|
||||
categories-imagemap.html: categories.dot
|
||||
dot -Tcmapx $^ > $@
|
||||
|
||||
api-examples.gfs: api-examples.txt MkExx.hs
|
||||
runghc MkExx.hs < $< > $@
|
||||
|
||||
# Since .gfo files aren't self-contained, the dependencies given here are
|
||||
# incomplete. But I am thinking that the Try%.gfo file will always be newer
|
||||
# than any other files it depends on, so the rule will trigger when
|
||||
# needed anyway. //TH 2018-10-22
|
||||
api-examples-%.txt: $(GF_alltenses)/Try%.gfo api-examples.gfs
|
||||
GF_LIB_PATH=$(GF_LIB_PATH) $(GF) -retain -s $< <api-examples.gfs >$@
|
||||
|
||||
clean:
|
||||
rm -rf \
|
||||
index.txt \
|
||||
index.html \
|
||||
api-examples.gfs \
|
||||
$(EXAMPLES_OUT)
|
||||
@@ -6,14 +6,17 @@ import Data.Char
|
||||
import Data.List
|
||||
import qualified Data.Map as M
|
||||
import Text.Printf
|
||||
import Config
|
||||
import Config (loadLangsFrom, LangInfo (..))
|
||||
import qualified Config
|
||||
|
||||
type Cats = [(String,String,String)]
|
||||
type Rules = [(String,String,String)]
|
||||
|
||||
-- the file generated
|
||||
synopsis :: FilePath
|
||||
synopsis = "synopsis.txt"
|
||||
outfile :: FilePath
|
||||
outfile = "index.txt"
|
||||
|
||||
configFile = ".." </> ".." </> Config.configFile
|
||||
|
||||
-- the language in which revealed examples are shown
|
||||
revealedLang :: String
|
||||
@@ -22,7 +25,7 @@ revealedLang = "Eng"
|
||||
-- all languages shown (a copy of this list appears in Makefile)
|
||||
apiExxFiles :: IO [FilePath]
|
||||
apiExxFiles = do
|
||||
langs <- loadLangsFrom (".." </> configFile)
|
||||
langs <- loadLangsFrom configFile
|
||||
return $
|
||||
[ "api-examples-" ++ (langCode lang) ++ ".txt"
|
||||
| lang <- langs
|
||||
@@ -35,7 +38,7 @@ main = do
|
||||
cs1 <- getCats commonAPI
|
||||
cs2 <- getCats catAPI
|
||||
let cs = sortCats (cs1 ++ cs2)
|
||||
writeFile synopsis "GF Resource Grammar Library: Synopsis"
|
||||
writeFile outfile "GF Resource Grammar Library: Synopsis"
|
||||
space
|
||||
append "%!Encoding:utf-8"
|
||||
append "%!style(html): ./revealpopup.css"
|
||||
@@ -50,7 +53,7 @@ main = do
|
||||
append "%!postproc(html): '#LParadigms' '<a name=\"RParadigms\"></a>'"
|
||||
append "%!postproc(tex): '#LParadigms' ''"
|
||||
delimit $ addToolTips cs
|
||||
include "synopsis-intro.txt" -- TODO dynamic language list
|
||||
include "intro.txt" -- TODO dynamic language list
|
||||
title "Categories"
|
||||
space
|
||||
link "Source 1:" commonAPI
|
||||
@@ -87,13 +90,13 @@ main = do
|
||||
title "Lexical Paradigms"
|
||||
paradigmFiles >>= mapM_ (putParadigms cs)
|
||||
space
|
||||
include "synopsis-additional.txt"
|
||||
include "additional.txt"
|
||||
space
|
||||
include "synopsis-browse.txt"
|
||||
include "browse.txt"
|
||||
space
|
||||
title "An Example of Usage"
|
||||
space
|
||||
include "synopsis-example.txt"
|
||||
include "example.txt"
|
||||
space
|
||||
title "Table of Contents"
|
||||
space
|
||||
@@ -232,7 +235,7 @@ mkCatTable cs = inChunks chsize (\rs -> header ++ map mk1 rs) cs
|
||||
mk1 (name,expl,ex) = unwords ["|", showCat cs name, "|", expl, "|", typo ex, "|"]
|
||||
typo ex = if take 1 ex == "\"" then itf (init (tail ex)) else ex
|
||||
|
||||
srcPath = ((</>) "../src")
|
||||
srcPath = ((</>) "../../src")
|
||||
|
||||
commonAPI = srcPath "abstract/Common.gf"
|
||||
catAPI = srcPath "abstract/Cat.gf"
|
||||
@@ -241,7 +244,7 @@ structuralAPI = srcPath "abstract/Structural.gf"
|
||||
|
||||
paradigmFiles :: IO [(String,FilePath)]
|
||||
paradigmFiles = do
|
||||
langs <- loadLangsFrom (".." </> configFile)
|
||||
langs <- loadLangsFrom configFile
|
||||
return $
|
||||
[ (name, srcPath $ printf "%s/Paradigms%s.gf" (langDir lang) (langCode lang))
|
||||
| lang <- langs
|
||||
@@ -263,7 +266,7 @@ splitOn f s = takeWhile (not.f) s : splitOn f rest
|
||||
"" -> []
|
||||
_:xs -> xs
|
||||
|
||||
append s = appendFile synopsis ('\n':s)
|
||||
append s = appendFile outfile ('\n':s)
|
||||
title s = append $ "=" ++ s ++ "="
|
||||
stitle s = append $ "==" ++ s ++ "=="
|
||||
include s = append $ "%!include: " ++ s
|
||||
@@ -1,329 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<META NAME="generator" CONTENT="http://txt2tags.org">
|
||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf8">
|
||||
<TITLE>From Resource Grammar to Wide Coverage Translation with GF</TITLE>
|
||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
||||
<CENTER>
|
||||
<H1>From Resource Grammar to Wide Coverage Translation with GF</H1>
|
||||
<FONT SIZE="4"><I>Aarne Ranta et al.</I></FONT><BR>
|
||||
<FONT SIZE="4">January-May 2014</FONT>
|
||||
</CENTER>
|
||||
|
||||
|
||||
<H2>Scope</H2>
|
||||
|
||||
<P>
|
||||
Wide-coverage interlingual translator for
|
||||
Bulgarian, Chinese, Dutch, English, Finnish, French, German,
|
||||
Hindi, Italian, Spanish, Swedish.
|
||||
</P>
|
||||
|
||||
<H2>How to use it</H2>
|
||||
|
||||
<P>
|
||||
If you just want to try it before reading more,
|
||||
here are the main ways to get started:
|
||||
</P>
|
||||
<P>
|
||||
1. <B>Run on our server.</B> <A HREF="http://www.grammaticalframework.org/demos/translation.html">http://www.grammaticalframework.org/demos/translation.html</A>
|
||||
</P>
|
||||
<P>
|
||||
2. <B>Get an Android app.</B> <A HREF="http://www.grammaticalframework.org/demos/app.html">http://www.grammaticalframework.org/demos/app.html</A>
|
||||
</P>
|
||||
<P>
|
||||
3. <B>Compile and run in the shell.</B> Get the latest GF sources (with darcs or github) and then
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI>compile and install the GF compiler and library and the C runtime (<CODE>pgf-translate</CODE>).
|
||||
<P></P>
|
||||
<LI>compile the translator:
|
||||
|
||||
<PRE>
|
||||
cd GF/lib/src
|
||||
make -j Translate11.pgf
|
||||
</PRE>
|
||||
|
||||
This will take a long time (fifteen minutes or more) and will probably require at least 8GB of RAM.
|
||||
<P></P>
|
||||
<LI>run the translator
|
||||
|
||||
<PRE>
|
||||
pgf-translate Translate11.pgf Phr TranslateEng TranslateSwe
|
||||
</PRE>
|
||||
|
||||
with obviously the possibility to vary the source and the target language.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
4. To modify the sources, work on the files in
|
||||
</P>
|
||||
|
||||
<PRE>
|
||||
GF/lib/src/translator/
|
||||
</PRE>
|
||||
|
||||
<P>
|
||||
It is these files that will be explained below.
|
||||
</P>
|
||||
|
||||
<H2>GF and the RGL</H2>
|
||||
|
||||
<P>
|
||||
GF, Grammatical Framework, was originally designed for the purpose of <B>multilingual controlled language systems</B>,
|
||||
which would enable high-quality translation on limited domains. The <B>abstract syntax</B> of GF defines the semantic
|
||||
structures relevant for the domain, and the <B>concrete syntaxes</B> map these structures to grammatically correct
|
||||
and idiomatic text in each target language. The <B>reversibility</B> of GF enables both <B>generation</B> and <B>parsing</B>,
|
||||
and thereby <B>translation</B> where the abstract syntax functions as an <B>interlingua</B>.
|
||||
</P>
|
||||
<P>
|
||||
As a bottle-neck of GF applications, it was soon realized that the definition of concrete syntax requires a lot
|
||||
of manual work and linguistic skill, because of the complexities of natural language syntax and morphology. Some of
|
||||
the complexities can be ignored in a small system. For instance, in a mathematical system, it may be enough to
|
||||
use verbs in the present tense only. But very much the same linguistic problems must be solved again and again
|
||||
in new applications: French verb inflection is the same in mathematics as in a tourist phrasebook. To solve
|
||||
this problem, the <B>GF Resource Grammar Library</B> (RGL) was developed, to take care of "low-level" linguistic
|
||||
rules such as inflection, agreement, and word order. This enables the authors of <B>application grammars</B> to focus
|
||||
on the semantics (when designing the abstract syntax) and on selecting RGL functions that produce the idioms they
|
||||
want. The RGL grew into an international open-source project, where more than 50 persons have contributed to
|
||||
implementing it for 29 languages by the time of writing this.
|
||||
</P>
|
||||
|
||||
<H2>Scaling up GF translation</H2>
|
||||
|
||||
<P>
|
||||
The RGL was thus originally designed to be used just as its name says: as a library
|
||||
for application grammars. Only the latter were meant to be used as <B>top-level grammars</B>, i.e. for
|
||||
parsing, generation, and translation at run time. Little attention was therefore
|
||||
paid to the usability of RGL as a top-level
|
||||
grammar by itself. But when applications accumulated, ranging from technical text to spoken dialogue, the coverage
|
||||
of the RGL grew into a coverage that approximates a "complete grammar" of many of the languages.
|
||||
And recently, there has indeed been success in using the RGL as a wide-coverage translation grammar,
|
||||
mainly due to Krasimir Angelov's efforts to scale up the size of GF applications from language fragments
|
||||
to open-text processing. This success is a result of four lines of development:
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI><B>More efficient processing</B>, both due to better algorithms and to an optimized C implementation of a PGF
|
||||
interpreter, the <B>C runtime</B>, achieving speeds competitive with the state of the art, e.g. the Stanford parser.
|
||||
This development is also based on the work of Peter Ljunglöf on GF parsing and Lauri Alanko on the C runtime.
|
||||
<P></P>
|
||||
<LI><B>Large-scale dictionaries</B>, both manually built and extracted from free sources, and linked into a multilingual
|
||||
translation dictionary now covering 10k to 60k entries for eleven languages. This work was started by Björn Bringert,
|
||||
who ported the Oxford Advanced Learner's Dictionary of English to GF.
|
||||
<P></P>
|
||||
<LI><B>Probabilistic disambiguation</B>, using a model trained from the Penn Treebank. Due to the common abstract syntax,
|
||||
the same model can be used for other languages as well, even though the adequacy of this transfer has not
|
||||
been systematically evaluated.
|
||||
<P></P>
|
||||
<LI><B>Robust parsing</B>, which recovers from unknown words and syntax
|
||||
by using chunk-by-chunk translations. This leads to loss of quality, but fulfills the principle that
|
||||
"something is better than nothing".
|
||||
</UL>
|
||||
|
||||
<H2>Remaining problems</H2>
|
||||
|
||||
<P>
|
||||
The result of all this work is a wide-coverage translation system, which can be used in the same way as Google
|
||||
Translate, Bing, Systran, and Apertium - to "translate anything", albeit with a varying quality. At the moment of
|
||||
writing, the performance is not yet generally on the level with the best of the competition, but shows some promising
|
||||
improvements in e.g. long-distance agreement and word order. To make these advantages into absolute improvements, we
|
||||
will need to fix problems that the other systems (or at least some of them) get right but where GF translation
|
||||
often fails:
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI><B>Lexical coverage</B>, to eliminate parsing failures due to unknown words.
|
||||
<P></P>
|
||||
<LI><B>Disambiguation</B>, with more sophisticated than the essentially context-free tree model used now.
|
||||
<P></P>
|
||||
<LI><B>Speed</B>, which gets worse with long sentences and with more complex languages.
|
||||
<P></P>
|
||||
<LI><B>Idiomacy</B>, due to the lack of idiomatic constructions that are not compositional and therefore don't get right
|
||||
in the RGL but are often correct in phrase-based SMT.
|
||||
</UL>
|
||||
|
||||
<H2>Advantages of GF translation</H2>
|
||||
|
||||
<P>
|
||||
Given that these issues get resolved, the strengths of the GF approach can be made more visible:
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI><B>Grammaticality</B>, in particular the already mentioned issues of agreement and word order.
|
||||
<P></P>
|
||||
<LI><B>Predictability</B>, in the sense that a local change in the input usually results in a corresponding
|
||||
local change in the output (unless otherwise required by idiomacy).
|
||||
<P></P>
|
||||
<LI><B>Feedback</B>, i.e. the ease of showing the confidence level of the translation, alternative translations,
|
||||
and linguistic information.
|
||||
<P></P>
|
||||
<LI><B>Adaptability</B>, i.e. the ease of fixing bugs, adapting the system to special domains, and personalizing it.
|
||||
This can be done with great precision. For instance, a bug in a grammar can be fixed without
|
||||
breaking anything else.
|
||||
<P></P>
|
||||
<LI><B>Light weight</B>. The system runs on standard laptops and even on mobile phones; the size of the run-time
|
||||
system for all pairs of 11 languages is under 25MB (on the Android platform), and recompiling the whole
|
||||
system (e.g. after bug fixes or
|
||||
domain adaptation) is a matter of a few minutes, where corresponding figures for SMT systems are gigabytes of size
|
||||
and days of retraining.
|
||||
<P></P>
|
||||
<LI><B>Multilinguality</B>, in the sense that once the parsing of the input is settled, the output can be readily
|
||||
rendered into all other languages,
|
||||
and also in the sense that the GF model works equally well for any language pair.
|
||||
</UL>
|
||||
|
||||
<H2>Wanted: more work, new ideas</H2>
|
||||
|
||||
<P>
|
||||
The recipes for improvement are, as always, <B>more work</B> and <B>new ideas</B>. Each of the four weaknesses mentioned
|
||||
above can be relieved by more work - in particular, lexical coverage by more work on the lexicon, since
|
||||
automatic extraction methods cannot really be trusted. As for disambiguation, new ideas about probabilistic
|
||||
tree models are being discussed. As for speed, new ideas on parsing (in particular, the integration of disambiguation
|
||||
with parsing) would help, but also the complexity of grammatical structures plays a major role. As for idiomacy,
|
||||
more work is being done in introducing <B>constructions</B> (non-compositional syntax rules, generalizing the notion of
|
||||
<B>multiword expressions</B>, in particular, <B>phrases</B> in SMT), but also new ideas are being discussed on how to
|
||||
extract such constructions from e.g. phrase tables.
|
||||
</P>
|
||||
<P>
|
||||
In the following, we will focus on describing the role of grammar in the GF translation system - in particular, how
|
||||
RGL can be modified to become usable as a top-level grammar for translating open text.
|
||||
As RGL was not meant to be used for parsing open text, but rather for the controlled language generation task,
|
||||
it has serious restrictions:
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI><B>Limited coverage</B>. The RGL does not cover all structures in any language - hence it is likely to fail when
|
||||
parsing unlimited text.
|
||||
<P></P>
|
||||
<LI><B>Semantic overgeneration</B>. Semantic distinctions, such as between mass and count nouns, or place and manner
|
||||
adverbials, are assumed to be defined in application grammars; the RGL just defines the combinatorics of
|
||||
elements, but doesn't prescribe which elements can really go together.
|
||||
<P></P>
|
||||
<LI><B>Spurious ambiguities</B>. RGL parsing creates more ambiguities than what would be necessary, if there
|
||||
was more semantic control. In addition, there are partly overlapping structures, which generate
|
||||
spurious syntactic ambiguities.
|
||||
<B>Example</B>: the very liberal apposition function.
|
||||
<P></P>
|
||||
<LI><B>Inefficiency</B>. Partly because of ambiguities, partly of the deep nesting and complex data structures, parsing
|
||||
with the RGL can be very slow when compared to application grammars, even the comprehensive ResourceDemo grammar.
|
||||
For some languages (Romanian, versions of French and Finnish), parsing is not practically possible at all because
|
||||
PGF generation fails for memory reasons.
|
||||
<P></P>
|
||||
<LI><B>Syntax orientation</B>. The structures of the RGL are rather superficial and don't guarantee translation
|
||||
equivalence when used as interlingua.
|
||||
<P></P>
|
||||
<LI><B>Coarse categories</B>. This is a particular aspect of syntax orientation, and causes at the same time overgeneration
|
||||
and spurious ambiguities.
|
||||
<B>Example</B>: the category <CODE>Adv</CODE>.
|
||||
</UL>
|
||||
|
||||
<H2>What speaks for using RGL</H2>
|
||||
|
||||
<P>
|
||||
Despite these problems, the RGL has shown to be a possible starting point for large-scale translation. It has a couple
|
||||
of advantages speaking for this:
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI><B>Coverage</B>. Even though not complete, the RGL has grown into a coverage that is close to complete enough; work
|
||||
with English shows that just about 20% more constructions can take us there.
|
||||
<P></P>
|
||||
<LI><B>Maintainability</B>. The RGL is constantly developed and maintained on its own right, and it makes sense to take
|
||||
advantage of this and avoid duplicated work with some other large-scale grammar.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
Of course, we are still left with the other
|
||||
option of addressing translation with an <I>application grammar</I>, something
|
||||
similar to the ResourceDemo with flatter and more semantic structures. But this would in turn require
|
||||
the replication of many rules, even though it would be to a large extent doable by using a <B>functor</B>, that is,
|
||||
by just one set of rules covering all languages.
|
||||
</P>
|
||||
|
||||
<H2>The structure of the wide-coverage translation grammar</H2>
|
||||
|
||||
<P>
|
||||
Thus the path chosen is a mixture of RGL and application grammar. In brief, the translation grammar consists of
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI><B>Selected RGL modules and functions</B>, as they are (using restricted inheritance); around 80% of the syntax.
|
||||
<P></P>
|
||||
<LI><B>Overridden RGL functions</B>, with more general types; just a few of them.
|
||||
<P></P>
|
||||
<LI><B>Overridden RGL linearizations</B>, typically with more <B>variants</B> in individual languages; just a few, but
|
||||
increasing.
|
||||
<P></P>
|
||||
<LI><B>Syntax extension</B>, new categories and functions, around 20% of the syntax, and increasing.
|
||||
<P></P>
|
||||
<LI><B>Big lexicon</B>, with an abstract syntax of 65k lemmas, increasing.
|
||||
<P></P>
|
||||
<LI><B>Constructions</B>, inspired by (and partly derived from) Construction Grammars, to capture idioms that
|
||||
involve specific lexical items and are therefore "between the syntax and the lexicon".
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
The following picture shows the principal module structure of the translation grammar.
|
||||
</P>
|
||||
<P>
|
||||
<IMG ALIGN="middle" SRC="translation.png" BORDER="0" ALT="">
|
||||
</P>
|
||||
<P>
|
||||
Here is a description of each of the modules:
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI><B>Translate</B> is the top module, which combines the RGL syntax with syntax extensions and a dictionary.
|
||||
The RGL syntax is not inherited in its entirety, which is indicated by a dashed line. The overridden abstract
|
||||
syntax functions (common to all languages) are replaced by functions in the Extensions module, whereas the
|
||||
overridden concrete syntax definitions (specific to each language) are defined in this Translate module.
|
||||
This consists of the module named <CODE>Translate</CODE>.
|
||||
<P></P>
|
||||
<LI><B>RGLSyntax</B> stands for the standard RGL module for syntax, excluding the RGL test lexicon and
|
||||
the language-specific extensions of it. This consists of the standard module named <CODE>Grammar</CODE> and
|
||||
the emerging module named <CODE>Construction</CODE>.
|
||||
<P></P>
|
||||
<LI><B>Extensions</B> stands for the syntax extensions added to the RGL syntax. This consists of the module
|
||||
named <CODE>Extensions</CODE>.
|
||||
<P></P>
|
||||
<LI><B>Dictionary</B> is a large-scale multilingual dictionary. Its abstract syntax uses as identifiers English words
|
||||
suffixed by categories and word sense information. This consists of the module named <CODE>Dictionary</CODE>.
|
||||
<P></P>
|
||||
<LI><B>RGLCategories</B> stands for the type system of the standard RGL, the module named <CODE>Cat</CODE>.
|
||||
<P></P>
|
||||
<LI><B>Chunk</B> is the grammar defining what chunks (noun phrases, verbs,
|
||||
adverbs, etc) can be used and how they are combined, when exact
|
||||
syntactic combination fails.
|
||||
</UL>
|
||||
|
||||
<H2>Where and why the translation grammar differs from the RGL</H2>
|
||||
|
||||
<P>
|
||||
A guiding principle is thus that the translation grammar preserves <I>as much as possible</I> of the RGL, so that
|
||||
duplicated work is avoided. But as the purposes of the two are different, not everything is possible. Two
|
||||
diverging principles have already been mentioned:
|
||||
</P>
|
||||
|
||||
<UL>
|
||||
<LI><B>Free variation</B>. The RGL bans free variation, because library users need to have full control on selecting
|
||||
variants. For instance, English negation has two forms, contracted (<I>don't</I>) and uncontracted (<I>do not</I>),
|
||||
which in the translation grammar are treated as variants. But RGL users sometimes need to choose the one or the
|
||||
other, for instance, excluding contracted negation in formal style.
|
||||
<P></P>
|
||||
<LI><B>Semantic distinctions</B>. The RGL avoids semantic distinctions that are not absolutely necessary for syntax.
|
||||
The reason for this is the ambition to keep the library as simple as possible, in particular for the voluntary
|
||||
implementors of new languages. But meaning-preserving translation needs more distinctions, for instance, in
|
||||
word senses, subcategorizations, selection restrictions, and tense and aspect systems.
|
||||
</UL>
|
||||
|
||||
<P>
|
||||
The old design principles of the RGL are thus kept in force, and this is made possible by separating parts of the
|
||||
translation grammar modules from the RGL.
|
||||
</P>
|
||||
|
||||
<!-- html code generated by txt2tags 2.6 (http://txt2tags.org) -->
|
||||
<!-- cmdline: txt2tags -thtml translation.txt -->
|
||||
</BODY></HTML>
|
||||
Reference in New Issue
Block a user