diff --git a/.gitignore b/.gitignore
index da8b52bf1..0ee62cfb2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,8 +39,26 @@ src/runtime/c/sg/.dirstamp
src/runtime/c/stamp-h1
src/runtime/java/.libs/
src/runtime/python/build/
-src/ui/android/libs/
-src/ui/android/obj/
.cabal-sandbox
cabal.sandbox.config
+.stack-work
DATA_DIR
+
+stack*.yaml.lock
+
+# Generated documentation (not exhaustive)
+demos/index-numbers.html
+demos/resourcegrammars.html
+demos/translation.html
+doc/tutorial/gf-tutorial.html
+doc/index.html
+doc/gf-bibliography.html
+doc/gf-developers.html
+doc/gf-editor-modes.html
+doc/gf-people.html
+doc/gf-refman.html
+doc/gf-shell-reference.html
+doc/icfp-2012.html
+download/*.html
+gf-book/index.html
+src/www/gf-web-api.html
diff --git a/LICENSE b/LICENSE
index 451984bf4..ed35bf27f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -8,24 +8,9 @@ other. For this reason the different components have different licenses.
In summary:
- the GF compiler in the folder src/compiler and the PGF Web service in src/server
-are under the GNU GENERAL PUBLIC LICENSE.
+are under the GNU GENERAL PUBLIC LICENSE.
- - the GF runtime in src/runtime is under dual GNU LESSER GENERAL PUBLIC LICENSE
-and BSD LICENSE
-
- - the resource grammar library in lib/src is under GNU LESSER GENERAL PUBLIC LICENSE.
-However the user have the right to choose any license for any application grammar
-derived from the resource grammar by using the grammar API.
-
- - the resource grammar library also includes large coverage lexicons for some languages.
-Since these lexicons are derived from external sources they might be under different licenses.
-Look at the source file for every lexicon for details. The lexicons that we currently have
-are:
-
- lib/src/bulgarian/ DictBul.gf DictBulAbs.gf for Bulgarian
- lib/src/english/ DictEng.gf DictEngAbs.gf for English
- lib/src/turkish/ DictTur.gf DictTurAbs.gf for Turkish
- lib/src/swedish/ DictSwe.gf DictSweAbs.gf for Swedish
+ - the GF runtime in src/runtime is under dual GNU LESSER GENERAL PUBLIC LICENSE and BSD LICENSE
The rest of this document contains copies of the GPL, LGPL and BSD licenses
which are applicable to the different components of Grammatical Framework
diff --git a/Makefile b/Makefile
index e5a1f2fb3..93a8dc20b 100644
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,7 @@ doc:
clean:
cabal clean
+ bash bin/clean_html
gf:
cabal build rgl-none
@@ -32,7 +33,7 @@ html::
# number to the top of debian/changelog.
# (Tested on Ubuntu 15.04. You need to install dpkg-dev & debhelper.)
deb:
- dpkg-buildpackage -b
+ dpkg-buildpackage -b -uc
# Make an OS X Installer package
pkg:
diff --git a/README.md b/README.md
index fe2a85ad0..5ee8967a6 100644
--- a/README.md
+++ b/README.md
@@ -37,13 +37,6 @@ The simplest way of installing GF is with the command:
cabal install
```
-This can be broken down into the usual sub-steps:
-```
-cabal configure
-cabal build
-cabal copy
-```
-
For more details, see the [download page](http://www.grammaticalframework.org/download/index.html)
and [developers manual](http://www.grammaticalframework.org/doc/gf-developers.html).
diff --git a/Setup.hs b/Setup.hs
index 505eb9813..27524dbe5 100644
--- a/Setup.hs
+++ b/Setup.hs
@@ -1,3 +1,4 @@
+import Distribution.System(Platform(..),OS(..))
import Distribution.Simple(defaultMainWithHooks,UserHooks(..),simpleUserHooks)
import Distribution.Simple.LocalBuildInfo(LocalBuildInfo(..),absoluteInstallDirs,datadir)
import Distribution.Simple.Setup(BuildFlags(..),Flag(..),InstallFlags(..),CopyDest(..),CopyFlags(..),SDistFlags(..))
@@ -73,5 +74,9 @@ dataDirFile = "DATA_DIR"
default_gf :: LocalBuildInfo -> FilePath
default_gf lbi = buildDir lbi > exeName' > exeNameReal
where
+ -- shadows Distribution.Simple.BuildPaths.exeExtension, which changed type signature in Cabal 2.4
+ exeExtension = case hostPlatform lbi of
+ Platform arch Windows -> "exe"
+ _ -> ""
exeName' = "gf"
exeNameReal = exeName' <.> exeExtension
diff --git a/WebSetup.hs b/WebSetup.hs
index fc70bb13e..fd55439b4 100644
--- a/WebSetup.hs
+++ b/WebSetup.hs
@@ -70,7 +70,7 @@ buildWeb gf flags (pkg,lbi) = do
gf_lib_path = datadir (absoluteInstallDirs pkg lbi dest) > "lib"
args = numJobs flags++["-make","-s"] -- ,"-optimize-pgf"
++["--gfo-dir="++tmp_dir,
- "--gf-lib-path="++gf_lib_path,
+ --"--gf-lib-path="++gf_lib_path,
"--name="++dropExtension pgf,
"--output-dir="++gfo_dir]
++[dir>file|file<-src]
@@ -104,9 +104,10 @@ setupWeb dest (pkg,lbi) = do
copy_pgf (pgf,subdir,_) =
do let src = gfo_dir > pgf
let dst = grammars_dir > pgf
- putStrLn $ "Installing "++dst
ex <- doesFileExist src
- if ex then copyFile src dst else return ()
+ if ex then do putStrLn $ "Installing "++dst
+ copyFile src dst
+ else putStrLn $ "Not installing "++dst
gf_logo = "gf0.png"
diff --git a/bin/build-binary-dist.sh b/bin/build-binary-dist.sh
index e44aaef0c..7a0e684bb 100755
--- a/bin/build-binary-dist.sh
+++ b/bin/build-binary-dist.sh
@@ -2,7 +2,7 @@
### This script builds a binary distribution of GF from the source
### package that this script is a part of. It assumes that you have installed
-### the Haskell Platform, version 2013.2.0.0 or 2012.4.0.0.
+### a recent version of the Haskell Platform.
### Two binary package formats are supported: plain tar files (.tar.gz) and
### OS X Installer packages (.pkg).
@@ -16,13 +16,14 @@ name="gf-$ver"
destdir="$PWD/dist/$name" # assemble binary dist here
prefix=${PREFIX:-/usr/local} # where to install
fmt=${FMT:-tar.gz} # binary package format (tar.gz or pkg)
+ghc=${GHC:-ghc} # which Haskell compiler to use
extralib="$destdir$prefix/lib"
extrainclude="$destdir$prefix/include"
extra="--extra-lib-dirs=$extralib --extra-include-dirs=$extrainclude"
set -e # Stop if an error occurs
-set -x # print commands before exuting them
+set -x # print commands before executing them
## First configure & build the C run-time system
pushd src/runtime/c
@@ -64,13 +65,30 @@ else
fi
## Build GF, with C run-time support enabled
-cabal install --only-dependencies -fserver -fc-runtime $extra
-cabal configure --prefix="$prefix" -fserver -fc-runtime $extra
+cabal install -w "$ghc" --only-dependencies -fserver -fc-runtime $extra
+cabal configure -w "$ghc" --prefix="$prefix" -fserver -fc-runtime $extra
DYLD_LIBRARY_PATH="$extralib" LD_LIBRARY_PATH="$extralib" cabal build
+ # Building the example grammars will fail, because the RGL is missing
+cabal copy --destdir="$destdir" # create www directory
+
+## Build the RGL and copy it to $destdir
+PATH=$PWD/dist/build/gf:$PATH
+export GF_LIB_PATH="$(dirname $(find "$destdir" -name www))/lib" # hmm
+mkdir -p "$GF_LIB_PATH"
+pushd ../gf-rgl
+make build
+make copy
+popd
+
+# Build GF again, including example grammars that need the RGL
+DYLD_LIBRARY_PATH="$extralib" LD_LIBRARY_PATH="$extralib" cabal build
+
+## Copy GF to $destdir
cabal copy --destdir="$destdir"
libdir=$(dirname $(find "$destdir" -name PGF.hi))
cabal register --gen-pkg-config=$libdir/gf-$ver.conf
+## Create the binary distribution package
case $fmt in
tar.gz)
targz="$name-bin-$hw-$os.tar.gz" # the final tar file
diff --git a/bin/clean_html b/bin/clean_html
new file mode 100755
index 000000000..8f0ec83d9
--- /dev/null
+++ b/bin/clean_html
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# This script finds all .t2t (txt2tags) and .md (Markdown) files
+# and deletes the corresponding HTML file of the same name.
+
+find . -name '*.t2t' | while read t2t ; do
+ html="${t2t%.t2t}.html"
+ if [ -f "$html" ] ; then
+ echo "$html"
+ rm -f "$html"
+ fi
+done
+
+find . -name '*.md' | while read md ; do
+ html="${md%.md}.html"
+ if [ -f "$html" ] ; then
+ echo "$html"
+ rm -f "$html"
+ fi
+done
diff --git a/bin/template.html b/bin/template.html
new file mode 100644
index 000000000..15306e1d9
--- /dev/null
+++ b/bin/template.html
@@ -0,0 +1,146 @@
+
+
+
+
+
+$for(include-after)$
+$include-after$
+$endfor$
+
+
+
+
diff --git a/bin/update_html b/bin/update_html
index 75f54b13e..912ff1fa0 100755
--- a/bin/update_html
+++ b/bin/update_html
@@ -1,11 +1,156 @@
#!/bin/bash
-### This script finds all .t2t (txt2tags) files and updates the corresponding
-### .html file, if it is out-of-date.
+# Generate HTML from txt2tags (.t2t) and Markdown (.md)
+# Usage:
+# - update_html
+# Look for all .t2t and .md files in the current directory and below,
+# generating the output HTML when the source is newer than the HTML.
+# - update_html path/to/file.t2t path/to/another.md
+# Generate HTML for the specified file(s), ignoring modification time.
+#
+# Requires:
+# - txt2tags for .t2t files. Tested with 2.6.
+# - pandoc for both .t2t and .md files. Tested with 1.16.0.2 and 2.3.1.
+# - the template file `template.html` in the same directory as this script.
+#
+# Tested with Ubuntu 16.04 and macOS Mojave.
+#
+# See also clean_html for removing the files generated by this script.
-find . -name '*.t2t' | while read t2t ; do
- html="${t2t%.t2t}.html"
- if [ "$t2t" -nt "$html" ] ; then
- txt2tags -thtml "$t2t"
+# Path to directory where this script is
+# https://stackoverflow.com/a/246128/98600
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
+
+# HTML template
+template="$DIR/template.html"
+
+# Render txt2tags into html file
+# Arguments:
+# 1. txt2tags source file, e.g. download/index.t2t
+# 2. html target file, e.g. download/index.html
+function render_t2t_html {
+ t2t="$1"
+ html="$2"
+ tmp="$2.tmp"
+ relroot="$( dirname $t2t | sed -E 's/^.\///' | sed -E 's/[^/]+/../g' )"
+
+ # First render with txt2tags to handle pre/post processing
+ txt2tags \
+ --target=html \
+ --no-headers \
+ --quiet \
+ --outfile="$tmp" \
+ --infile="$t2t"
+
+ # Replace with so that Pandoc retains it
+ # Do this for both cases since BSD sed doesn't support /i
+ sed -i.bak "s/<\/a>/
== Before you start ==
@@ -63,18 +55,6 @@ Other required tools included in the Haskell Platform are
and
[Happy http://www.haskell.org/happy/].
-%=== Darcs ===
-%
-%To get the GF source code, you also need //Darcs//, version 2 or later.
-%Darcs 2.10 is recommended (July 2015).
-%
-%//Darcs//
-%is a distributed version control system, see http://darcs.net/ for
-%more information. There are precompiled packages for many platforms
-%available and source code if you want to compile it yourself. Darcs
-%is also written in Haskell and so you can use GHC to compile it.
-
-
=== Git ===
To get the GF source code, you also need //Git//.
@@ -425,13 +405,13 @@ There is also ``make build``, ``make copy`` and ``make clean`` which do what you
=== Advanced ===
For advanced build options, call the Haskell build script directly:
```
-$ runghc Make.hs ...
+$ runghc Setup.hs ...
```
For more details see the [README https://github.com/GrammaticalFramework/gf-rgl/blob/master/README.md].
=== Haskell-free ===
-If you do not have Haskell installed, you can use the simple build script ``Make.sh``
-(or ``Make.bat`` for Windows).
+If you do not have Haskell installed, you can use the simple build script ``Setup.sh``
+(or ``Setup.bat`` for Windows).
== Creating binary distribution packages ==
diff --git a/doc/gf-editor-modes.t2t b/doc/gf-editor-modes.t2t
index 603025834..ffa6473ea 100644
--- a/doc/gf-editor-modes.t2t
+++ b/doc/gf-editor-modes.t2t
@@ -1,12 +1,6 @@
Editor modes & IDE integration for GF
-%!style:../css/style.css
-%!options(html): --toc
-%!postproc(html):
-%!encoding:utf-8
-%!postproc(html):
-
We collect GF modes for various editors on this page. Contributions are
welcome!
diff --git a/doc/gf-faq.t2t b/doc/gf-faq.t2t
deleted file mode 100644
index 016511cf0..000000000
--- a/doc/gf-faq.t2t
+++ /dev/null
@@ -1,91 +0,0 @@
-Grammatical Framework: Frequently Asked Quuestions
-Aarne Ranta
-%%date(%c)
-
-% NOTE: this is a txt2tags file.
-% Create an html file from this file using:
-% txt2tags gf-bibliography.t2t
-
-%!style:../css/style.css
-%!target:html
-%!options(html): --toc
-%!postproc(html):
-%!postproc(html): #BR
-%!encoding:utf-8
-%!postproc(html):
-
-
-===What has been done with GF?===
-
-**Translation**: systems with any number of parallel languages, with input in one language and output in all the others.
-
-**Natural language generation** (NLG): translation from a formal language to natural languages.
-
-**Ontology verbalization** is a special case of NLG.
-
-**Language training**: grammar and vocabulary training systems.
-
-**Human-computer interaction**: natural language interfaces, spoken dialogue systems.
-
-**Linguistics**: comparisons between languages.
-
-
-
-===What parts does GF have?===
-
-A **grammar compiler**, used for compiling grammars to parsing, generation, and translation code.
-
-A **run-time system**, used for parsing, generation and translation. The run-time system is available in several languages:
-Haskell, Java, C, C++, Javascript, and Python. The point with this is that you can include GF-based parsing and generation in
-larger programs written in any of these languages.
-
-A **resource grammar library**, containing the morphology and basic syntax of currently 26 languages.
-
-A **web application toolkit**, containing server-side (Haskell) and client-side (Javascript) libraries.
-
-An **integrated development environment**, the GF-Eclipse plug-in.
-
-A **shell**, i.e. a command interpreter for testing and developing GF grammars. This is the program started by the command ``gf`` in a terminal.
-
-
-
-===Is GF open-source?===
-
-
-===Can I use GF for commercial applications?===
-
-Yes. Those parts of GF that you will need to distribute - the run-time system and the libraries - are licensed under LGPL and BSD; it's up to you to choose which.
-
-
-
-===When was GF started?===
-
-
-===Where does the name GF come from?===
-
-GF = Grammatical Framework = LF + concrete syntax
-
-LF = Logical Framework
-
-Logical Frameworks are implementations of type theory, which have been built since the 1980's to support formalized mathematics. GF has its roots in
-type theory, which is widely used in the semantics of natural language. Some of these ideas were first implemented in ALF, Another Logical Framework,
-in 1992; the book //Type-Theoretical Grammar// (by A. Ranta, OUP 1994) has a chapter and an appendix on this. The first implementations did not have
-a parser, and GF proper, started in 1998, was an implementation of yet another LF together with concrete syntax supporting generation and parsing.
-Grammatical Framework was a natural name for this. We tried to avoid it in the beginning, because it sounded pretentious in its generality. But the
-name was just too natural to be avoided.
-
-
-
-===Is GF backward compatible?===
-
-
-
-===Do I need Haskell to use GF?===
-
-No. GF is a language of its own, and you don't need to know Haskell. And if you download the GF binary, you don't need any Haskell tools. But if you want to
-become a GF developer, then it's better you install GF from the latest source, and then you need the GHC Haskell compiler to compile GF. But even then, you
-don't need to know Haskell yourself.
-
-
-===What is a lock field?===
-
diff --git a/doc/gf-help-full.txt b/doc/gf-help-full.txt
index c10e60494..1b9c31705 100644
--- a/doc/gf-help-full.txt
+++ b/doc/gf-help-full.txt
@@ -68,9 +68,9 @@ metavariables and the type of the expression.
Prints a set of strings in the .dot format (the graphviz format).
The graph can be saved in a file by the wf command as usual.
If the -view flag is defined, the graph is saved in a temporary file
-which is processed by graphviz and displayed by the program indicated
-by the flag. The target format is postscript, unless overridden by the
-flag -format.
+which is processed by 'dot' (graphviz) and displayed by the program indicated
+by the view flag. The target format is png, unless overridden by the
+flag -format. Results from multiple trees are combined to pdf with convert (ImageMagick).
- Options:
@@ -151,6 +151,7 @@ of a pipe.
| ``-one`` | pick the first strings, if there is any, from records and tables
| ``-table`` | show all strings labelled by parameters
| ``-unqual`` | hide qualifying module names
+ | ``-trace`` | trace computations
#NORMAL
@@ -242,7 +243,7 @@ and thus cannot be a part of a pipe.
====e = empty====
#NOINDENT
-``e`` = ``empty``: //empty the environment.//
+``e`` = ``empty``: //empty the environment (except the command history).//
#TINY
@@ -281,6 +282,19 @@ but the resulting .gf file must be imported separately.
#NORMAL
+#VSPACE
+
+====eh = execute_history====
+#NOINDENT
+``eh`` = ``execute_history``: //read commands from a file and execute them.//
+
+#TINY
+
+- Syntax: ``eh FILE``
+
+#NORMAL
+
+
#VSPACE
====gr = generate_random====
@@ -434,12 +448,14 @@ sequences; see example.
| ``-list`` | show all forms and variants, comma-separated on one line (cf. l -all)
| ``-multi`` | linearize to all languages (default)
| ``-table`` | show all forms labelled by parameters
+ | ``-tabtreebank`` | show the tree and its linearizations on a tab-separated line
| ``-treebank`` | show the tree and tag linearizations with language names
| ``-bind`` | bind tokens separated by Prelude.BIND, i.e. &+
| ``-chars`` | lexer that makes every non-space character a token
| ``-from_amharic`` | from unicode to GF Amharic transliteration
| ``-from_ancientgreek`` | from unicode to GF ancient Greek transliteration
| ``-from_arabic`` | from unicode to GF Arabic transliteration
+ | ``-from_arabic_unvocalized`` | from unicode to GF unvocalized Arabic transliteration
| ``-from_cp1251`` | decode from cp1251 (Cyrillic used in Bulgarian resource)
| ``-from_devanagari`` | from unicode to GF Devanagari transliteration
| ``-from_greek`` | from unicode to GF modern Greek transliteration
@@ -453,11 +469,14 @@ sequences; see example.
| ``-from_urdu`` | from unicode to GF Urdu transliteration
| ``-from_utf8`` | decode from utf8 (default)
| ``-lexcode`` | code-like lexer
+ | ``-lexgreek`` | lexer normalizing ancient Greek accentuation
+ | ``-lexgreek2`` | lexer normalizing ancient Greek accentuation for text with vowel length annotations
| ``-lexmixed`` | mixture of text and code, as in LaTeX (code between $...$, \(...)\, \[...\])
| ``-lextext`` | text-like lexer
| ``-to_amharic`` | from GF Amharic transliteration to unicode
| ``-to_ancientgreek`` | from GF ancient Greek transliteration to unicode
| ``-to_arabic`` | from GF Arabic transliteration to unicode
+ | ``-to_arabic_unvocalized`` | from GF unvocalized Arabic transliteration to unicode
| ``-to_cp1251`` | encode to cp1251 (Cyrillic used in Bulgarian resource)
| ``-to_devanagari`` | from GF Devanagari transliteration to unicode
| ``-to_greek`` | from GF modern Greek transliteration to unicode
@@ -473,6 +492,7 @@ sequences; see example.
| ``-to_utf8`` | encode to utf8 (default)
| ``-unchars`` | unlexer that puts no spaces between tokens
| ``-unlexcode`` | code-like unlexer
+ | ``-unlexgreek`` | unlexer de-normalizing ancient Greek accentuation
| ``-unlexmixed`` | mixture of text and code (code between $...$, \(...)\, \[...\])
| ``-unlextext`` | text-like unlexer
| ``-unwords`` | unlexer that puts a single space between tokens (default)
@@ -513,6 +533,7 @@ trees where a function node is a metavariable.
| ``-from_amharic`` | from unicode to GF Amharic transliteration
| ``-from_ancientgreek`` | from unicode to GF ancient Greek transliteration
| ``-from_arabic`` | from unicode to GF Arabic transliteration
+ | ``-from_arabic_unvocalized`` | from unicode to GF unvocalized Arabic transliteration
| ``-from_cp1251`` | decode from cp1251 (Cyrillic used in Bulgarian resource)
| ``-from_devanagari`` | from unicode to GF Devanagari transliteration
| ``-from_greek`` | from unicode to GF modern Greek transliteration
@@ -526,11 +547,14 @@ trees where a function node is a metavariable.
| ``-from_urdu`` | from unicode to GF Urdu transliteration
| ``-from_utf8`` | decode from utf8 (default)
| ``-lexcode`` | code-like lexer
+ | ``-lexgreek`` | lexer normalizing ancient Greek accentuation
+ | ``-lexgreek2`` | lexer normalizing ancient Greek accentuation for text with vowel length annotations
| ``-lexmixed`` | mixture of text and code, as in LaTeX (code between $...$, \(...)\, \[...\])
| ``-lextext`` | text-like lexer
| ``-to_amharic`` | from GF Amharic transliteration to unicode
| ``-to_ancientgreek`` | from GF ancient Greek transliteration to unicode
| ``-to_arabic`` | from GF Arabic transliteration to unicode
+ | ``-to_arabic_unvocalized`` | from GF unvocalized Arabic transliteration to unicode
| ``-to_cp1251`` | encode to cp1251 (Cyrillic used in Bulgarian resource)
| ``-to_devanagari`` | from GF Devanagari transliteration to unicode
| ``-to_greek`` | from GF modern Greek transliteration to unicode
@@ -546,6 +570,7 @@ trees where a function node is a metavariable.
| ``-to_utf8`` | encode to utf8 (default)
| ``-unchars`` | unlexer that puts no spaces between tokens
| ``-unlexcode`` | code-like unlexer
+ | ``-unlexgreek`` | unlexer de-normalizing ancient Greek accentuation
| ``-unlexmixed`` | mixture of text and code (code between $...$, \(...)\, \[...\])
| ``-unlextext`` | text-like unlexer
| ``-unwords`` | unlexer that puts a single space between tokens (default)
@@ -666,10 +691,9 @@ command (flag -printer):
fa finite automaton in graphviz format
gsl Nuance speech recognition format
haskell Haskell (abstract syntax)
+ java Java (abstract syntax)
js JavaScript (whole grammar)
jsgf JSGF speech recognition format
- lambda_prolog LambdaProlog (abstract syntax)
- lp_byte_code Bytecode for Teyjus (abstract syntax, experimental)
pgf_pretty human-readable pgf
prolog Prolog (whole grammar)
python Python (whole grammar)
@@ -753,6 +777,7 @@ To see transliteration tables, use command ut.
| ``-from_amharic`` | from unicode to GF Amharic transliteration
| ``-from_ancientgreek`` | from unicode to GF ancient Greek transliteration
| ``-from_arabic`` | from unicode to GF Arabic transliteration
+ | ``-from_arabic_unvocalized`` | from unicode to GF unvocalized Arabic transliteration
| ``-from_cp1251`` | decode from cp1251 (Cyrillic used in Bulgarian resource)
| ``-from_devanagari`` | from unicode to GF Devanagari transliteration
| ``-from_greek`` | from unicode to GF modern Greek transliteration
@@ -766,11 +791,14 @@ To see transliteration tables, use command ut.
| ``-from_urdu`` | from unicode to GF Urdu transliteration
| ``-from_utf8`` | decode from utf8 (default)
| ``-lexcode`` | code-like lexer
+ | ``-lexgreek`` | lexer normalizing ancient Greek accentuation
+ | ``-lexgreek2`` | lexer normalizing ancient Greek accentuation for text with vowel length annotations
| ``-lexmixed`` | mixture of text and code, as in LaTeX (code between $...$, \(...)\, \[...\])
| ``-lextext`` | text-like lexer
| ``-to_amharic`` | from GF Amharic transliteration to unicode
| ``-to_ancientgreek`` | from GF ancient Greek transliteration to unicode
| ``-to_arabic`` | from GF Arabic transliteration to unicode
+ | ``-to_arabic_unvocalized`` | from GF unvocalized Arabic transliteration to unicode
| ``-to_cp1251`` | encode to cp1251 (Cyrillic used in Bulgarian resource)
| ``-to_devanagari`` | from GF Devanagari transliteration to unicode
| ``-to_greek`` | from GF modern Greek transliteration to unicode
@@ -786,6 +814,7 @@ To see transliteration tables, use command ut.
| ``-to_utf8`` | encode to utf8 (default)
| ``-unchars`` | unlexer that puts no spaces between tokens
| ``-unlexcode`` | code-like unlexer
+ | ``-unlexgreek`` | unlexer de-normalizing ancient Greek accentuation
| ``-unlexmixed`` | mixture of text and code (code between $...$, \(...)\, \[...\])
| ``-unlextext`` | text-like unlexer
| ``-unwords`` | unlexer that puts a single space between tokens (default)
@@ -799,13 +828,14 @@ To see transliteration tables, use command ut.
- Examples:
- | ``l (EAdd 3 4) | ps -code`` | linearize code-like output
- | ``ps -lexer=code | p -cat=Exp`` | parse code-like input
+ | ``l (EAdd 3 4) | ps -unlexcode`` | linearize code-like output
+ | ``ps -lexcode | p -cat=Exp`` | parse code-like input
| ``gr -cat=QCl | l | ps -bind`` | linearization output from LangFin
| ``ps -to_devanagari "A-p"`` | show Devanagari in UTF8 terminal
| ``rf -file=Hin.gf | ps -env=quotes -to_devanagari`` | convert translit to UTF8
| ``rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic`` | convert UTF8 to transliteration
| ``ps -to=chinese.trans "abc"`` | apply transliteration defined in file chinese.trans
+ | ``ps -lexgreek "a)gavoi` a)'nvrwpoi' tines*"`` | normalize ancient greek accentuation
#NORMAL
@@ -828,7 +858,6 @@ are type checking and semantic computation.
- Options:
| ``-compute`` | compute by using semantic definitions (def)
- | ``-paraphrase`` | paraphrase by using semantic definitions (def)
| ``-largest`` | sort trees from largest to smallest, in number of nodes
| ``-nub`` | remove duplicate trees
| ``-smallest`` | sort trees from smallest to largest, in number of nodes
@@ -838,12 +867,10 @@ are type checking and semantic computation.
- Flags:
| ``-number`` | take at most this many trees
- | ``-transfer`` | syntactic transfer by applying function, recursively in subtrees
- Examples:
| ``pt -compute (plus one two)`` | compute value
- | ``p "4 dogs love 5 cats" | pt -transfer=digits2numeral | l`` | four...five...
#NORMAL
@@ -990,8 +1017,6 @@ This command requires a source grammar to be in scope, imported with 'import -re
The operations include the parameter constructors that are in scope.
The optional TYPE filters according to the value type.
The grep STRINGs filter according to other substrings of the type signatures.
-This command must be a line of its own, and thus cannot be a part
-of a pipe.
- Syntax: ``so (-grep=STRING)* TYPE?``
- Options:
@@ -1002,6 +1027,12 @@ of a pipe.
| ``-grep`` | substring used for filtering (the command can have many of these)
+- Examples:
+
+ | ``so Det`` | show all opers that create a Det
+ | ``so -grep=Prep`` | find opers relating to Prep
+ | ``so | wf -file=/tmp/opers`` | write the list of opers to a file
+
#NORMAL
@@ -1113,6 +1144,7 @@ This command must be a line of its own, and thus cannot be a part of a pipe.
| ``-amharic`` | Amharic
| ``-ancientgreek`` | ancient Greek
| ``-arabic`` | Arabic
+ | ``-arabic_unvocalized`` | unvocalized Arabic
| ``-devanagari`` | Devanagari
| ``-greek`` | modern Greek
| ``-hebrew`` | unvocalized Hebrew
@@ -1137,35 +1169,41 @@ This command must be a line of its own, and thus cannot be a part of a pipe.
#TINY
Prints a dependency tree in the .dot format (the graphviz format, default)
+or LaTeX (flag -output=latex)
or the CoNLL/MaltParser format (flag -output=conll for training, malt_input
for unanalysed input).
By default, the last argument is the head of every abstract syntax
function; moreover, the head depends on the head of the function above.
The graph can be saved in a file by the wf command as usual.
If the -view flag is defined, the graph is saved in a temporary file
-which is processed by graphviz and displayed by the program indicated
-by the flag. The target format is png, unless overridden by the
-flag -format.
+which is processed by dot (graphviz) and displayed by the program indicated
+by the view flag. The target format is png, unless overridden by the
+flag -format. Results from multiple trees are combined to pdf with convert (ImageMagick).
+See also 'vp -showdep' for another visualization of dependencies.
- Options:
| ``-v`` | show extra information
+ | ``-conll2latex`` | convert conll to latex
- Flags:
- | ``-file`` | configuration file for labels per fun, format 'fun l1 ... label ... l2'
- | ``-format`` | format of the visualization file (default "png")
- | ``-output`` | output format of graph source (default "dot")
- | ``-view`` | program to open the resulting file (default "open")
+ | ``-abslabels`` | abstract configuration file for labels, format per line 'fun label*'
+ | ``-cnclabels`` | concrete configuration file for labels, format per line 'fun {words|*} pos label head'
+ | ``-file`` | same as abslabels (abstract configuration file)
+ | ``-format`` | format of the visualization file using dot (default "png")
+ | ``-output`` | output format of graph source (latex, conll, dot (default but deprecated))
+ | ``-view`` | program to open the resulting graph file (default "open")
| ``-lang`` | the language of analysis
- Examples:
| ``gr | vd`` | generate a tree and show dependency tree in .dot
- | ``gr | vd -view=open`` | generate a tree and display dependency tree on a Mac
- | ``gr -number=1000 | vd -file=dep.labels -output=malt`` | generate training treebank
- | ``gr -number=100 | vd -file=dep.labels -output=malt_input`` | generate test sentences
+ | ``gr | vd -view=open`` | generate a tree and display dependency tree on with Mac's 'open'
+ | ``gr | vd -view=open -output=latex`` | generate a tree and display latex dependency tree with Mac's 'open'
+ | ``gr -number=1000 | vd -abslabels=Lang.labels -cnclabels=LangSwe.labels -output=conll`` | generate a random treebank
+ | ``rf -file=ex.conll | vd -conll2latex | wf -file=ex.tex`` | convert conll file to latex
#NORMAL
@@ -1182,15 +1220,16 @@ flag -format.
Prints a parse tree in the .dot format (the graphviz format).
The graph can be saved in a file by the wf command as usual.
If the -view flag is defined, the graph is saved in a temporary file
-which is processed by graphviz and displayed by the program indicated
-by the flag. The target format is png, unless overridden by the
-flag -format.
+which is processed by dot (graphviz) and displayed by the program indicated
+by the view flag. The target format is png, unless overridden by the
+flag -format. Results from multiple trees are combined to pdf with convert (ImageMagick).
- Options:
| ``-showcat`` | show categories in the tree nodes (default)
| ``-nocat`` | don't show categories
+ | ``-showdep`` | show dependency labels
| ``-showfun`` | show function names in the tree nodes
| ``-nofun`` | don't show function names (default)
| ``-showleaves`` | show the leaves of the tree (default)
@@ -1198,6 +1237,8 @@ flag -format.
- Flags:
+ | ``-lang`` | the language to visualize
+ | ``-file`` | configuration file for dependency labels with -deps, format per line 'fun label*'
| ``-format`` | format of the visualization file (default "png")
| ``-view`` | program to open the resulting file (default "open")
| ``-nodefont`` | font for tree nodes (default: Times -- graphviz standard font)
@@ -1210,7 +1251,8 @@ flag -format.
- Examples:
| ``p "John walks" | vp`` | generate a tree and show parse tree as .dot script
- | ``gr | vp -view="open"`` | generate a tree and display parse tree on a Mac
+ | ``gr | vp -view=open`` | generate a tree and display parse tree on a Mac
+ | ``p "she loves us" | vp -view=open -showdep -file=uddeps.labels -nocat`` | show a visual variant of a dependency tree
#NORMAL
@@ -1227,9 +1269,9 @@ flag -format.
Prints a set of trees in the .dot format (the graphviz format).
The graph can be saved in a file by the wf command as usual.
If the -view flag is defined, the graph is saved in a temporary file
-which is processed by graphviz and displayed by the program indicated
-by the flag. The target format is postscript, unless overridden by the
-flag -format.
+which is processed by dot (graphviz) and displayed by the command indicated
+by the view flag. The target format is postscript, unless overridden by the
+flag -format. Results from multiple trees are combined to pdf with convert (ImageMagick).
With option -mk, use for showing library style function names of form 'mkC'.
diff --git a/doc/gf-lrec-2010.pdf b/doc/gf-lrec-2010.pdf
deleted file mode 100644
index 83d1a58cf..000000000
Binary files a/doc/gf-lrec-2010.pdf and /dev/null differ
diff --git a/doc/gf-people.html b/doc/gf-people.html
deleted file mode 100644
index 37e430ae7..000000000
--- a/doc/gf-people.html
+++ /dev/null
@@ -1,132 +0,0 @@
-
-
-
-
-GF People
-
-
-
-
-
-
-
-
-
-
-
Grammatical Framework: Authors and Acknowledgements
-
-
-At least the following colleagues are thanked for suggestions,
-bug reports, and other indirect contributions to the code. (Notice:
-these are early contributors - the list has not been updated since 2004 or so).
-
-
-
-From 2001 to 2004, GF enjoyed funding from the
-Vinnova foundation, within the
-
-Interactive Languge Technology project.
-
-
-
-
-
diff --git a/doc/gf-people.md b/doc/gf-people.md
new file mode 100644
index 000000000..9e0177306
--- /dev/null
+++ b/doc/gf-people.md
@@ -0,0 +1,63 @@
+---
+title: "Grammatical Framework: Authors and Acknowledgements"
+---
+
+## Current maintainers
+
+The current maintainers of GF are
+
+[Krasimir Angelov](http://www.chalmers.se/cse/EN/organization/divisions/computing-science/people/angelov-krasimir),
+[Thomas Hallgren](http://www.cse.chalmers.se/~hallgren/),
+[Aarne Ranta](http://www.cse.chalmers.se/~aarne/),
+[John J. Camilleri](http://johnjcamilleri.com), and
+[Inari Listenmaa](https://inariksit.github.io/).
+
+This page is otherwise not up to date.
+For detailed data about contributors to the code repositories since 2007, see
+[here (gf-core)](https://github.com/GrammaticalFramework/gf-core/graphs/contributors)
+and
+[here (gf-rgl)](https://github.com/GrammaticalFramework/gf-rgl/graphs/contributors).
+
+## Previous contributors
+
+The following people have contributed code to some of the versions:
+
+- Grégoire Détrez (University of Gothenburg)
+- Ramona Enache (University of Gothenburg)
+- [Björn Bringert](http://www.cse.chalmers.se/alumni/bringert) (University of Gothenburg)
+- Håkan Burden (University of Gothenburg)
+- Hans-Joachim Daniels (Karlsruhe)
+- [Markus Forsberg](http://www.cs.chalmers.se/~markus) (Chalmers)
+- [Kristofer Johannisson](http://www.cs.chalmers.se/~krijo) (University of Gothenburg)
+- [Janna Khegai](http://www.cs.chalmers.se/~janna) (Chalmers)
+- [Peter Ljunglöf](http://www.cse.chalmers.se/~peb) (University of Gothenburg)
+- Petri Mäenpää (Nokia)
+
+At least the following colleagues are thanked for suggestions, bug
+reports, and other indirect contributions to the code.
+
+- [Stefano Berardi](http://www.di.unito.it/~stefano/) (Torino)
+- Pascal Boldini (Paris)
+- [Paul Callaghan](http://www.dur.ac.uk/~dcs0pcc/) (Durham)
+- Lauri Carlson (Helsinki)
+- [Koen Claessen](http://www.cse.chalmers.se/~koen) (Chalmers)
+- [Robin Cooper](http://www.cling.gu.se/~cooper) (Gothenburg)
+- [Thierry Coquand](http://www.cse.chalmers.se/~coquand) (Chalmers)
+- [Marc Dymetman](http://www.xrce.xerox.com/people/dymetman/dymetman.html) (XRCE)
+- Bertrand Grégoire (Tudor Institute, Luxembourg)
+- [Reiner Hähnle](http://www.cse.chalmers.se/~reiner) (Chalmers)
+- [Gérard Huet](http://pauillac.inria.fr/~huet/) (INRIA)
+- [Patrik Jansson](http://www.cse.chalmers.se/~patrikj) (Chalmers)
+- Bernard Jaulin (Paris)
+- [Lauri Karttunen](http://www.xrce.xerox.com/people/karttunen/karttunen.html) (PARC)
+- Matti Kinnunen (Nokia)
+- [Veronika Lux](http://www.xrce.xerox.com/people/lux/) (XRCE)
+- Per Martin-Löf (Stockholm)
+- [Bengt Nordström](http://www.cse.chalmers.se/~bengt) (Chalmers)
+- [Martin Okrslar](http://www.cis.uni-muenchen.de/studenten/stud_homepages/okrslar/reklame.html) (CIS)
+- Jianmin Pang (Durham)
+- [Sylvain Pogodalla](http://www.xrce.xerox.com/people/pogodalla/index.fr.html) (XRCE)
+- [Loïc Pottier](http://www.inria.fr/Loic.Pottier) (INRIA)
+- [Annie Zaenen](http://www2.parc.com/istl/members/zaenen/) (PARC)
+
+The GF logo was designed by Uula Ranta.
diff --git a/doc/gf-quickstart.html b/doc/gf-quickstart.html
deleted file mode 100644
index 787c23ce6..000000000
--- a/doc/gf-quickstart.html
+++ /dev/null
@@ -1,158 +0,0 @@
-
-
-
-GF Quickstart
-
-
-
-
-
-
-
-
-
-
-Aarne Ranta
-
-October 2011 for GF 3.3
-
-
-
-
Grammatical Framework Quick Start
-
-
-
-This Quick Start shows a few examples of how GF can be used.
-We assume that you have downloaded and installed GF, so that
-the command gf works for you. See download and install
-instructions here.
-
-
Copy the files
-Food.gf,
-FoodEng.gf, and
-FoodIta.gf.
-Or go to GF/examples/tutorial/food/, if you have downloaded the
-GF sources.
-
-
Start GF with the shell command (without the prompt $)
-
- $ gf FoodIta.gf FoodEng.gf
-
-Alternatively, start GF with gf and give the GF command import FoodIta.gf FoodEng.gf.
-
-
Translation. Try your first translation by giving the GF command
-
- parse "this cheese is very very Italian" | linearize
-
-Notice that the parser accept the tabulator for word completion.
-
-
Generation. Random-generate sentences in two languages:
-
- generate_random | linearize
-
-
-
Other commands. Use the help command
-
- help
-
-
More examples. Go to GF/examples/phrasebook or some other
-subdirectory of GF/examples/. Or try a resource grammar by, for instance,
-
- import alltenses/LangEng.gfo alltenses/LangGer.gfo
-
- parse -lang=Eng "I love you" | linearize -treebank
-
-The resource grammars are found relative to the value of GF_LIB_PATH, which
-you may have to set; see here for instructions.
-
-
-
-
-
-
-
-
Grammar development
-
-Add words to the Food
-grammars and try the above commands again. For instance, add the following lines:
-
- Bread : Kind ; -- in Food.gf
- Bread = {s = "bread"} ; -- in FoodEng.gf
- Bread = {s = "pane"} ; -- in FoodIta.gf
-
-and start GF again with the same command. Now you can even translate
-this bread is very Italian.
-
-To lear more on GF commands and
-grammar development, go to the one of the tutorials:
-
-
-GF has its own "machine language", PGF (Portable Grammar Format),
-which is recommended for use in applications at run time. To produce a PGF file from
-the two grammars above, do
-
- gf -make FoodIta.gf FoodEng.gf
- wrote Food.pgf
-
-You can use this in Haskell and Java programs, and also on web services, such as
-
-
-The quickest way to provide a GF web service is to start GF with the -server option:
-
- $ gf -server
- This is GF version 3.3
- Built on linux/i386 with ghc-7.0, flags: interrupt server cclazy
- Document root = /usr/local/share/gf-3.3/www
- Starting HTTP server, open http://localhost:41296/ in your web browser.
-
-You can view it locally by pointing your
-browser to the URL shown. You can add your own .pgf grammar to the service by
-copying it over to the documentRoot directory. Just push "reload" in
-your browser after each such update.
-
-
-
-To build more customized web application, consult the
-developer wiki.
-
-
-
User group
-
-You are welcome to join the User Group
-to get help and discuss GF-related issues!
-
-
-
-
-
-
diff --git a/doc/gf-reference.pdf b/doc/gf-reference.pdf
deleted file mode 100644
index 6eed355b8..000000000
Binary files a/doc/gf-reference.pdf and /dev/null differ
diff --git a/doc/gf-reference.t2t b/doc/gf-reference.t2t
deleted file mode 100644
index aab828f0a..000000000
--- a/doc/gf-reference.t2t
+++ /dev/null
@@ -1,493 +0,0 @@
-GF Quick Reference
-Aarne Ranta
-April 4, 2006
-
-% NOTE: this is a txt2tags file.
-% Create an html file from this file using:
-% txt2tags -thtml gf-reference.t2t
-
-%!style:../css/style.css
-%!target:html
-%!options: --toc
-%!postproc(html):
-%!postproc(html):
-
-This is a quick reference on GF grammars. It aims to
-cover all forms of expression available when writing
-grammars. It assumes basic knowledge of GF, which
-can be acquired from the
-[GF Tutorial http://www.grammaticalframework.org/doc/tutorial/gf-tutorial.html].
-Help on GF commands is obtained on line by the
-help command (``help``), and help on invoking
-GF with (``gf -help``).
-
-
-===A complete example===
-
-This is a complete example of a GF grammar divided
-into three modules in files. The grammar recognizes the
-phrases //one pizza// and //two pizzas//.
-
-File ``Order.gf``:
-```
-abstract Order = {
-cat
- Order ;
- Item ;
-fun
- One, Two : Item -> Order ;
- Pizza : Item ;
-}
-```
-File ``OrderEng.gf`` (the top file):
-```
---# -path=.:prelude
-concrete OrderEng of Order =
- open Res, Prelude in {
-flags startcat=Order ;
-lincat
- Order = SS ;
- Item = {s : Num => Str} ;
-lin
- One it = ss ("one" ++ it.s ! Sg) ;
- Two it = ss ("two" ++ it.s ! Pl) ;
- Pizza = regNoun "pizza" ;
-}
-```
-File ``Res.gf``:
-```
-resource Res = open Prelude in {
-param Num = Sg | Pl ;
-oper regNoun : Str -> {s : Num => Str} =
- \dog -> {s = table {
- Sg => dog ;
- _ => dog + "s"
- }
- } ;
-}
-```
-To use this example, do
-```
- % gf -- in shell: start GF
- > i OrderEng.gf -- in GF: import grammar
- > p "one pizza" -- parse string
- > l Two Pizza -- linearize tree
-```
-
-
-
-===Modules and files===
-
-One module per file.
-File named ``Foo.gf`` contains module named
-``Foo``.
-
-Each module has the structure
-```
-moduletypename =
- Inherits ** -- optional
- open Opens in -- optional
- { Judgements }
-```
-Inherits are names of modules of the same type.
-Inheritance can be restricted:
-```
- Mo[f,g], -- inherit only f,g from Mo
- Lo-[f,g] -- inheris all but f,g from Lo
-```
-Opens are possible in ``concrete`` and ``resource``.
-They are names of modules of these two types, possibly
-qualified:
-```
- (M = Mo), -- refer to f as M.f or Mo.f
- (Lo = Lo) -- refer to f as Lo.f
-```
-Module types and judgements in them:
-```
-abstract A -- cat, fun, def, data
-concrete C of A -- lincat, lin, lindef, printname
-resource R -- param, oper
-
-interface I -- like resource, but can have
- oper f : T without definition
-instance J of I -- like resource, defines opers
- that I leaves undefined
-incomplete -- functor: concrete that opens
- concrete CI of A = one or more interfaces
- open I in ...
-concrete CJ of A = -- completion: concrete that
- CI with instantiates a functor by
- (I = J) instances of open interfaces
-```
-The forms
-``param``, ``oper``
-may appear in ``concrete`` as well, but are then
-not inherited to extensions.
-
-All modules can moreover have ``flags`` and comments.
-Comments have the forms
-```
--- till the end of line
-{- any number of lines between -}
---# used for compiler pragmas
-```
-A ``concrete`` can be opened like a ``resource``.
-It is translated as follows:
-```
-cat C ---> oper C : Type =
-lincat C = T T ** {lock_C : {}}
-
-fun f : G -> C ---> oper f : A* -> C* = \g ->
-lin f = t t g ** {lock_C = <>}
-```
-An ``abstract`` can be opened like an ``interface``.
-Any ``concrete`` of it then works as an ``instance``.
-
-
-
-===Judgements===
-
-```
-cat C -- declare category C
-cat C (x:A)(y:B x) -- dependent category C
-cat C A B -- same as C (x : A)(y : B)
-fun f : T -- declare function f of type T
-def f = t -- define f as t
-def f p q = t -- define f by pattern matching
-data C = f | g -- set f,g as constructors of C
-data f : A -> C -- same as
- fun f : A -> C; data C=f
-
-lincat C = T -- define lin.type of cat C
-lin f = t -- define lin. of fun f
-lin f x y = t -- same as lin f = \x y -> t
-lindef C = \s -> t -- default lin. of cat C
-printname fun f = s -- printname shown in menus
-printname cat C = s -- printname shown in menus
-printname f = s -- same as printname fun f = s
-
-param P = C | D Q R -- define parameter type P
- with constructors
- C : P, D : Q -> R -> P
-oper h : T = t -- define oper h of type T
-oper h = t -- omit type, if inferrable
-
-flags p=v -- set value of flag p
-```
-Judgements are terminated by semicolons (``;``).
-Subsequent judgments of the same form may share the
-keyword:
-```
-cat C ; D ; -- same as cat C ; cat D ;
-```
-Judgements can also share RHS:
-```
-fun f,g : A -- same as fun f : A ; g : A
-```
-
-
-===Types===
-
-Abstract syntax (in ``fun``):
-```
-C -- basic type, if cat C
-C a b -- basic type for dep. category
-(x : A) -> B -- dep. functions from A to B
-(_ : A) -> B -- nondep. functions from A to B
-(p,q : A) -> B -- same as (p : A)-> (q : A) -> B
-A -> B -- same as (_ : A) -> B
-Int -- predefined integer type
-Float -- predefined float type
-String -- predefined string type
-```
-Concrete syntax (in ``lincat``):
-```
-Str -- token lists
-P -- parameter type, if param P
-P => B -- table type, if P param. type
-{s : Str ; p : P}-- record type
-{s,t : Str} -- same as {s : Str ; t : Str}
-{a : A} **{b : B}-- record type extension, same as
- {a : A ; b : B}
-A * B * C -- tuple type, same as
- {p1 : A ; p2 : B ; p3 : C}
-Ints n -- type of n first integers
-```
-Resource (in ``oper``): all those of concrete, plus
-```
-Tok -- tokens (subtype of Str)
-A -> B -- functions from A to B
-Int -- integers
-Strs -- list of prefixes (for pre)
-PType -- parameter type
-Type -- any type
-```
-As parameter types, one can use any finite type:
-``P`` defined in ``param P``,
-``Ints n``, and record types of parameter types.
-
-
-
-===Expressions===
-
-Syntax trees = full function applications
-```
-f a b -- : C if fun f : A -> B -> C
-1977 -- : Int
-3.14 -- : Float
-"foo" -- : String
-```
-Higher-Order Abstract syntax (HOAS): functions as arguments:
-```
-F a (\x -> c) -- : C if a : A, c : C (x : B),
- fun F : A -> (B -> C) -> C
-```
-Tokens and token lists
-```
-"hello" -- : Tok, singleton Str
-"hello" ++ "world" -- : Str
-["hello world"] -- : Str, same as "hello" ++ "world"
-"hello" + "world" -- : Tok, computes to "helloworld"
-[] -- : Str, empty list
-```
-Parameters
-```
-Sg -- atomic constructor
-VPres Sg P2 -- applied constructor
-{n = Sg ; p = P3} -- record of parameters
-```
-Tables
-```
-table { -- by full branches
- Sg => "mouse" ;
- Pl => "mice"
- }
-table { -- by pattern matching
- Pl => "mice" ;
- _ => "mouse" -- wildcard pattern
- }
-table {
- n => regn n "cat" -- variable pattern
- }
-table Num {...} -- table given with arg. type
-table ["ox"; "oxen"] -- table as course of values
-\\_ => "fish" -- same as table {_ => "fish"}
-\\p,q => t -- same as \\p => \\q => t
-
-t ! p -- select p from table t
-case e of {...} -- same as table {...} ! e
-```
-Records
-```
-{s = "Liz"; g = Fem} -- record in full form
-{s,t = "et"} -- same as {s = "et";t= "et"}
-{s = "Liz"} ** -- record extension: same as
- {g = Fem} {s = "Liz" ; g = Fem}
-
- -- tuple, same as {p1=a;p2=b;p3=c}
-```
-Functions
-```
-\x -> t -- lambda abstract
-\x,y -> t -- same as \x -> \y -> t
-\x,_ -> t -- binding not in t
-```
-Local definitions
-```
-let x : A = d in t -- let definition
-let x = d in t -- let defin, type inferred
-let x=d ; y=e in t -- same as
- let x=d in let y=e in t
-let {...} in t -- same as let ... in t
-
-t where {...} -- same as let ... in t
-```
-Free variation
-```
-variants {x ; y} -- both x and y possible
-variants {} -- nothing possible
-```
-Prefix-dependent choices
-```
-pre {"a" ; "an" / v} -- "an" before v, "a" otherw.
-strs {"a" ; "i" ;"o"}-- list of condition prefixes
-```
-Typed expression
-```
- -- same as t, to help type inference
-```
-Accessing bound variables in ``lin``: use fields ``$1, $2, $3,...``.
-Example:
-```
-fun F : (A : Set) -> (El A -> Prop) -> Prop ;
-lin F A B = {s = ["for all"] ++ A.s ++ B.$1 ++ B.s}
-```
-
-
-===Pattern matching===
-
-These patterns can be used in branches of ``table`` and
-``case`` expressions. Patterns are matched in the order in
-which they appear in the grammar.
-```
-C -- atomic param constructor
-C p q -- param constr. applied to patterns
-x -- variable, matches anything
-_ -- wildcard, matches anything
-"foo" -- string
-56 -- integer
-{s = p ; y = q} -- record, matches extensions too
-
-This document is a reference manual to the GF programming language.
-GF, Grammatical Framework, is a special-purpose programming language,
-designed to support definitions of grammars.
-
-
-This document is not an introduction to GF; such introduction can be
-found in the GF tutorial available on line on the GF web page,
-
-This manual covers only the language, not the GF compiler or
-interactive system. We will however make some references to different
-compiler versions, if they involve changes of behaviour having to
-do with the language specification.
-
-
-This manual is meant to be fully compatible with GF version 3.0.
-Main discrepancies with version 2.8 are indicated,
-as well as with the reference article on GF,
-
-
-A. Ranta, "Grammatical Framework. A Type Theoretical Grammar Formalism",
-The Journal of Functional Programming 14(2), 2004, pp. 145-189.
-
-
-This article will referred to as "the JFP article".
-
-
-As metalinguistic notation, we will use the symbols
-
-
-
a === b to say that a is syntactic sugar for b
-
a ==> b to say that a is computed (or compiled) to b
-
-
-
-
Overview of GF
-
-GF is a typed functional language,
-borrowing many of its constructs from ML and Haskell: algebraic datatypes,
-higher-order functions, pattern matching. The module system bears resemblance
-to ML (functors) but also to object-oriented languages (inheritance).
-The type theory used in the abstract syntax part of GF is inherited from
-logical frameworks, in particular ALF ("Another Logical Framework"; in a
-sense, GF is Yet Another ALF). From ALF comes also the use of dependent
-types, including the use of explicit type variables instead of
-Hindley-Milner polymorphism.
-
-
-The look and feel of GF is close to Java and
-C, due to the use of curly brackets and semicolons in structuring the code;
-the expression syntax, however, follows Haskell in using juxtaposition for
-function application and parentheses only for grouping.
-
-
-To understand the constructs of GF, and especially their limitations in comparison
-to general-purpose programming languages, it is essential to keep in mind that
-GF is a special-purpose and non-turing-complete language. Every GF program is
-ultimately compiled to a multilingual grammar, which consists of an
-abstract syntax and a set of concrete syntaxes. The abstract syntax
-defines a system of syntax trees, and each concrete syntax defines a
-mapping from those syntax trees to nested tuples of strings and integers.
-This mapping is compositional, i.e. homomorphic, and moreover
-reversible: given a nested tuple, there exists an effective way of finding
-the set of syntax trees that map to this tuple. The procedure of applying the
-mapping to a tree to produce a tuple is called linearization, and the
-reverse search procedure is called parsing. It is ultimately the requirement
-of reversibility that restricts GF to be less than turing-complete. This is
-reflected in restrictions to recursion in concrete syntax. Tree formation in
-abstract syntax, however, is fully recursive.
-
-
-Even though run-time GF grammars manipulate just nested tuples, at compile
-time these are represented by by the more fine-grained labelled records
-and finite functions over algebraic datatypes. This enables the programmer
-to write on a higher abstraction level, and also adds type distinctions
-and hence raises the level of checking of programs.
-
-
-
The module system
-
-
Top-level and supplementary module structure
-
-The big picture of GF as a programming language for multilingual grammars
-explains its principal module structure. Any GF grammar must have an
-abstract syntax module; it can in addition have any number of concrete
-syntax modules matching that abstract syntax. Before going to details,
-we give a simple example: a module defining the categoryA
-of adjectives and one adjective-forming function, the zero-place function
-Even. We give the module the name Adj. The GF code for the
-module looks as follows:
-
-
- abstract Adj = {
- cat A ;
- fun Even : A ;
- }
-
-
-Here are two concrete syntax modules, one intended for mapping the trees
-to English, the other to Swedish. The mappling is defined by
-lincat definitions assigning a linearization type to each category,
-and lin definitions assigning a linearization to each function.
-
-These examples illustrate the main ideas of multilingual grammars:
-
-
-
the concrete syntax must match the abstract syntax:
-
-
every cat is given a lincat
-
every fun is given a lin
-
-
-
-
-
the concrete syntax is internally coherent:
-
-
the lin rules respect the types defined by lincat rules
-
-
-
-
-
concrete syntaxes are independent of each other
-
-
they can use different lincat and lin definitions
-
they can define their own parameter types (param)
-
-
-
-
-The first two ideas form the core of the static checking of GF
-grammars, eliminating the possibility of run-time errors in
-linearization and parsing. The third idea gives GF the expressive
-power needed to map abstract syntax to vastly different languages.
-
-
-Abstract and concrete modules are called top-level grammar modules,
-since they are the ones that remain in grammar systems at run time.
-However, in order to support modular grammar engineering, GF provides
-much more module structure than strictly required in top-level grammars.
-
-
-Inheritance, also known as extension, means that a module can inherit the
-contents of one or more other modules to which new judgements are added,
-e.g.
-
-
- abstract MoreAdj = Adj ** {
- fun Odd : A ;
- }
-
-
-Resource modules define parameter types and operations usable
-in several concrete syntaxes,
-
-
- resource MorphoFre = {
- param Number = Sg | Pl ;
- param Gender = Masc | Fem ;
- oper regA : Str -> {s : Gender => Number => Str} =
- \fin -> {
- s = table {
- Masc => table {Sg => fin ; Pl => fin + "s"} ;
- Fem => table {Sg => fin + "e" ; Pl => fin + "es"}
- }
- } ;
- }
-
-
-By opening, a module can use the contents of a resource module
-without inheriting them, e.g.
-
-
- concrete AdjFre of Adj = open MorphoFre in {
- lincat A = {s : Gender => Number => Str} ;
- lin Even = regA "pair" ;
- }
-
-
-Interfaces and instances separate the contents of a resource module
-to type signatures and definitions, in a way analogous to abstract vs. concrete
-modules, e.g.
-
-Functors i.e. parametrized modules i.e. incomplete modules, defining
-a concrete syntax in terms of an interface.
-
-
- incomplete concrete AdjI of Adj = open Lexicon in {
- lincat A = Adjective ;
- lin Even = even_A ;
- }
-
-
-A functor can be instantiated by providing instances of its open interfaces.
-
-
- concrete AdjEng of Adj = AdjI with (Lexicon = LexiconEng) ;
-
-
-
-
Compilation units
-
-The compilation unit of GF source code is a file that contains a module.
-Judgements outside modules are supported only for backward compatibility,
-as explained here.
-Every source file, suffixed .gf, is compiled to a "GF object file",
-suffixed .gfo (as of GF Version 3.0 and later). For runtime grammar objects
-used for parsing and linearization, a set of .gfo files is linked to
-a single file suffixed .pgf. While .gf and .gfo files may contain
-modules of any kinds, a .pgf file always contains a multilingual grammar
-with one abstract and a set of concrete syntaxes.
-
-
-The following diagram summarizes the files involved in the compilation process.
-
-module1.gf module2.gf ... modulen.gf
-
-
-==>
-
-
-module1.gfo module2.gfo ... modulen.gfo
-
-
-==>
-
-
-grammar.pgf
-
-Both .gf and .gfo files are written in the GF source language;
-.pgf files are written in a lower-level format. The process of translating
-.gf to .gfo consists of name resolution, type annotation,
-partial evaluation, and optimization.
-There is a great advantage in the possibility to do this
-separately for GF modules and saving the result in .gfo files. The partial
-evaluation phase, in particular, is time and memory consuming, and GF libraries
-are therefore distributed in .gfo to make their use less arduous.
-
-
-In GF before version 3.0, the object files are in a format called .gfc,
-and the multilingual runtime grammar is in a format called .gfcm.
-
-
-The standard compiler has a built-in make facility, which finds out what
-other modules are needed when compiling an explicitly given module.
-This facility builds a dependency graph and decides which of the involved
-modules need recompilation (from .gf to .gfo), and for which the
-GF object can be used directly.
-
-
-
Names
-
-Each module M defines a set of names, which are visible in M
-itself, in all modules extending M (unless excluded, as explained
-here), and
-all modules opening M. These names can stand for abstract syntax
-categories and functions, parameter types and parameter constructors,
-and operations. All these names live in the same name space, which
-means that a name entering a module more than once due to inheritance or
-opening can lead to a conflict. It is specified
-here how these
-conflicts are resolved.
-
-
-The names of modules live in a name space separate from the other names.
-Even here, all names must be distinct in a set of files compiled to a
-multilingual grammar. In particular, even files residing in different directories
-must have different names, since GF has no notion of hierarchic
-module names.
-
-
-Lexically, names belong to the class of identifiers. An idenfifier is
-a letter followed by any number of letters, digits, undercores (_) and
-primes ('). Upper- and lower-case letters are treated as distinct.
-Nothing dictates the choice of upper or lower-case initials, but
-the standard libraries follow conventions similar to Haskell:
-
-
-
upper case is used for modules, abstract syntax categories and functions,
- parameter types and constructors, and type synonyms
-
lower case is used for non-type-valued operations and for variables
-
-
-
-
-
-
-"Letters" as mentioned in the identifier syntax include all 7-bit ASCII
-letters. Iso-latin-1 and Unicode letters are supported in varying degrees
-by different tools and platforms, and are hence not recommended in identifiers.
-
-
-
The structure of a module
-
-Modules of all types have the following structure:
-
-moduletypename=extendsopensbody
-
-The part of the module preceding the body is its header. The header
-defines the type of the module and tells what other modules it inherits
-and opens. The body consists of the judgements that introduce all the new
-names defined by the module.
-
-
-Any of the parts extends, opens, and body may be empty.
-If they are all filled, delimiters and keywords separate the parts in the
-following way:
-
-moduletypename=
- extends**openopensin{body}
-
-The part moduletypename looks slightly different if the
-type is concrete or instance: the name intrudes between
-the type keyword and the name of the module being implemented and which
-really belongs to the type of the module:
-
- concretenameofabstractname
-
-The only exception to the schema of functor syntax
-is functor instantiations: the instantiation
-list is given in a special way between extends and opens:
-
-Logically, the part "functornamewithinstantiations" should
-really be one of the extends. This is also shown by the fact that
-it can have restricted inheritance (concept defined here).
-
-
-
Module types, headers, and bodies
-
-The extends and opens parts of a module header are lists of
-module names (with possible qualifications, as defined below here).
-The first step of type checking a module consists of verifying that
-these names stand for modules of approptiate module types. As a rule
-of thumb,
-
-
-
the extends of a module must have the same moduletype
-
the opens of a module must be of type resource
-
-
-
-However, the precise rules are a little more fine-grained, because
-of the presence of interfaces and their instances, and the possibility
-to reuse abstract and concrete modules as resources. The following table
-gives, for all module types, the possible module types of their extends
-and opens, as well as the forms of judgement legal in that module type.
-
-
-
-
module type
-
extends
-
opens
-
body
-
-
-
abstract
-
abstract
-
-
-
cat, fun, def, data
-
-
-
concrete ofabstract
-
concrete
-
resource*
-
lincat, cat, oper, param
-
-
-
resource
-
resource*
-
resource*
-
oper, param
-
-
-
interface
-
resource+
-
resource*
-
oper, param
-
-
-
instance ofinterface
-
resource*
-
resource*
-
oper, param
-
-
-
incomplete concrete
-
concrete+
-
resource+
-
lincat, cat, oper, param
-
-
-
-
-
-The table uses the following shorthands for lists of module types:
-
-
-
resource*: resource, instance, concrete
-
resource+: resource*, interface, abstract
-
concrete+: concrete, incomplete concrete
-
-
-
-The legality of judgements in the body is checked before the judgements
-themselves are checked.
-
-Why are the legality conditions of opens and extends so complicated? The best way
-to grasp them is probably to consider a simplified logical model of the module
-system, replacing modules by types and functions. This model could actually
-be developed towards treating modules in GF as first-class objects; so far,
-however, this step has not been motivated by any practical needs.
-
-
-
-
module
-
object and type
-
-
-
abstract A = B
-
A = B : type
-
-
-
concrete C of A = B
-
C = B : A -> S
-
-
-
interface I = B
-
I = B : type
-
-
-
instance J of I = B
-
J = B : I
-
-
-
incomplete concrete C of A = open I in B
-
C = B : I -> A -> S
-
-
-
concrete K of A = C with (I=J)
-
K = B(J) : A -> S
-
-
-
resource R = B
-
R = B : I
-
-
-
concrete C of A = open R in B
-
C = B(R) : A -> S
-
-
-
-
-
-A further step of defining modules as first-class objects would use
-GADTs and record types:
-
-
-
an abstract syntax is a Generalized Algebraic Datatype (GADT)
-
the target type S of concrete syntax is the type of nested
- tuples over strings and integers
-
an interface is a labelled record type
-
an instance is a record of the type defined by the interface
-
a functor, with a module body opening an interface, is a function
- on its instances
-
the instantiation of a functor is an application of the function to
- some instance
-
a resource is a typed labelled record, putting together an interface and
- an instance of it
-
the body of a module opening a resource is as a function on the interface
- implicit in the resource; this function is immediately applied to the instance
- defined in the resource
-
-
-
-Slightly unexpectedly, interfaces and instances are easier to understand
-in this way than resources - a resource is, indeed, more complex, since
-it fuses together an interface and an instance.
-
-
-
-
-
-When an abstract is used as an interface and a concrete as its instance, they
-are actually reinterpreted so that they match the model. Then the abstract is
-no longer a GADT, but a system of abstract datatypes, with a record field
-of type Type for each category, and a function among these types for each
-abstract syntax function. A concrete syntax instantiates this record with
-linearization types and linearizations.
-
-
-
Inheritance
-
-After checking that the extends of a module are of appropriate
-module types, the compiler adds the inherited judgements to the
-judgements included in the body. The inherited judgements are
-not copied entirely, but their names with links to the inherited module.
-Conflicts may arise in this process: a name can have two definitions in the combined
-pool of inherited and added judgements. Such a conflict is always an
-error: GF provides no way to redefine an inherited constant.
-
-
-Simple as the definition of a conflict may sound, it has to take care of the
-inheritance hierarchy. A very common pattern of inheritance is the
-diamond: inheritance from two modules which themselves inherit a common
-base module. Assume that the base module defines a name f:
-
-
- N
- / \
- M1 M2
- \ /
- Base {f}
-
-
-Now, N inherits f from both M1 and M2, so is there a
-conflict? The answer in GF is no, because the "two" f's are in the
-end the same: the one defined in Base. The situation is thus simpler
-than in multiple inheritance in languages like C++, because definitions in
-GF are immutable: neither M1 nor M2 can possibly have changed
-the definition of f given in Base. In practice, the compiler manages
-inheritance through hierarchy in a very simple way, by just always creating
-a link not to the immediate parent, but the original ancestor; this ancestor
-can be read from the link provided by the immediate parent. Here is how
-links are created from source modules by the compiler:
-
-
- Base {f}
- M1 {m1} ===> M1 {Base.f, m1}
- M2 {m2} ===> M2 {Base.f, m2}
- N {n} ===> N {Base.f, M1.m1, M2.m2, n}
-
-
-
-
-
-
-Inheritance can be restricted. This means that a module can be specified
-as inheriting only explicitly listed constants, or all constants
-except ones explicitly listed. The syntax uses constant names in brackets,
-prefixed by a minus sign in the case of an exclusion list. In the following
-configuration, N inherits a,b,c from M1, and all names but d
-from M2
-
-
- N = M1 {a,b,c}, M2-{d}
-
-
-Restrictions are performed as a part of inheritance linking, module by module:
-the link is created for a constant if and only if it is both
-included in the module and compatible with the restriction. Thus,
-for instance, an inadvertent usage can exclude a constant from one module
-but inherit it from another one. In the following
-configuration, f is inherited via M1, if M1 inherits it.
-
-
- N = M1 [a,b,c], M2-[f]
-
-
-Unintended inheritance may cause problems later in compilation, in the
-judgement-level dependency analysis phase. For instance, suppose a function
-f has category C as its type in M, and we only include f. The
-exclusion has the effect of creating an ill-formed module:
-
-
- abstract M = {cat C ; fun f : C ;}
- M [f] ===> {fun f : C ;}
-
-
-One might expect inheritance restriction to be transitive: if an included
-constant b depends on some other constant a, then a should be
-included automatically. However, this rule would leave to hard-to-detect
-inheritances. And it could only be applied later in the compilation phase,
-when the compiler has not only collected the names defined, but also
-resolved the names used in definitions.
-
-
-Yet another pitfall with restricted inheritance is that it must be stated
-for each module separately. For instance, a concrete syntax of an abstract
-must exclude all those names that the abstract does, and a functor instantiation
-must replicate all restrictions of the functor.
-
-
-
Opening
-
-Opening makes constants from other modules usable in judgements, without
-inheriting them. This means that, unlike inheritance, opening is not
-transitive.
-
-
-
-
-
-Opening cannot be restricted as inheritance can, but it can be qualified.
-This means that the names from the opened modules cannot be used as such, but
-only as prefixed by a qualifier and a dot (.). The qualifier can be any
-identifier, including the name of the module. Here is an example of
-an opens list:
-
-
- open A, (X = XSLTS), (Y = XSLTS), B
-
-
-If A defines the constant a, it can be accessed by the names
-
-
- a A.a
-
-
-If XSLTS defines the constant x, it can be accessed by the names
-
-
- X.x Y.x XSLTS.x
-
-
-Thus qualification by real module name is always possible, and one and the same
-module can be qualified in different ways at the same time (the latter can
-be useful if you want to be able to change the implementations of some
-constants to a different resource later). Since the qualification with real
-module name is always possible, it is not possible to "swap" the names of
-modules locally:
-
-
- open (A=B), (B=A) -- NOT POSSIBLE!
-
-
-The list of qualifiers names and module names in a module header may
-thus not contain any duplicates.
-
-
-
Name resolution
-
-
-
-
-Name resolution is the compiler phase taking place after inheritance
-linking. It qualifies all names occurring in the definition parts of judgements
-(that is, just excluding the defined names themselves) with the names of
-the modules they come from. If a name can come from different modules (that is,
-not from their common ancestor), a conflict is reported; this decision is
-hence not dependent on e.g. types, which are known only at a later phase.
-
-
-Qualification of names is the main device for avoiding conflicts in
-name resolution. No other information is used, such as priorities between
-modules. However, if a name is defined in different opened modules
-but never used in the module body,
-a conflict does not arise: conflicts arise only
-when names are used. Also in this respect, opening is thus different from
-inheritance, where conflicts are checked independently of use.
-
-
-As usual, inner scope has priority in name resolution. This means that
-if an identifier is in scope as a bound variable, it will not be
-interpreted as a constant, unless qualified by a module name
-(variable bindings are explained here).
-
-
-
Functor instantiations
-
-We have dealt with the principles of module headers, inheritance, and
-names in a general way that applies to all module types. The exception
-is functor instantiations, that have an extra part of the instantiating
-equations, assigning an instance to every interface. Here is a typical
-example, displaying the full generality:
-
-
- concrete FoodsEng of Foods = PhrasesEng **
- FoodsI-[Pizza] with
- (Syntax = SyntaxEng),
- (LexFoods = LexFoodsEng) **
- open SyntaxEng, ParadigmsEng in {
- lin Pizza = mkCN (mkA "Italian") (mkN "pie") ;
- }
-
-
-(The example is modified from Section 5.9 in the GF Tutorial.)
-
-
-The instantiation syntax is similar to qualified opens. The left-hand-side
-names must be interfaces, the right-hand-side names their instances. (Recall
-that abstract can be use as interface and concrete as its
-instance.) Inheritance from the functor can be restricted, typically
-in the purpose of defining some excluded functions in language-specific
-ways in the module body.
-
-
-
Completeness
-
-
-
-
-(This section refers to the forms of judgement introduced here.)
-
-
-A concrete is complete with respect to an abstract, if it
-contains a lincat definition for every cat declaration, and
-a lin definition for every fun declaration.
-
-
-The same completeness criterion applies to functor instantiations.
-It is not possible to use a partial functor instantiation, leading
-to another functor.
-
-
-Functors do not need to be complete in the sense concrete modules need.
-The missing definitions can then be provided in the body of each
-functor instantiation.
-
-
-A resource is complete, if all its oper and param judgements
-have a definition part. While a resource must be complete, an
-interface need not. For an interface, it is the definition
-parts of judgements are optional.
-
-
-An instance is complete with respect to an interface, if it
-gives the definition parts of all oper and param judgements
-that are omitted in the interface. Giving definitions to judgements
-that have already been defined in the interface is illegal.
-Type signatures, on the other hand, can be repeated if the same types
-are used.
-
-
-In addition to completing the definitions in an interface,
-its instance may contain other judgements, but these must all
-be complete with definitions.
-
-
-Here is an example of an instance and its interface showing the
-above variations:
-
-
- interface Pos = {
- param Case ; -- no definition
- param Number = Sg | Pl ; -- definition given
- oper Noun : Type = { -- relative definition given
- s : Number => Case => Str
- } ;
- oper regNoun : Str -> Noun ; -- no definition
- }
-
- instance PosEng of Pos = {
- param Case = Nom | Gen ; -- definition of Case
- -- Number and Noun inherited
- oper regNoun = \dog -> { -- type of regNoun inherited
- s = table { -- definition of regNoun
- Sg => table {
- Nom => dog
- -- etc
- }
- } ;
- oper house_N : Noun = -- new definition
- regNoun "house" ;
- }
-
-
-
-
Judgements
-
-
Overview of the forms of judgement
-
-
-
-
-A module body in GF is a set of judgements. Judgements are
-definitions or declarations, sometimes combinations of the two; the
-common feature is that every judgement introduces a name, which is
-available in the module and whenever the module is extended or opened.
-
-
-There are several different forms of judgement, identified by different
-judgement keywords. Here is a list of all these forms, together
-with syntax descriptions and the types of modules in which each form can occur.
-The table moreover indicates whether the judgement has a default value, and
-whether it contributes to the name base, i.e. introduces a new
-name to the scope.
-
-
-
-
judgement
-
where
-
module
-
default
-
base
-
-
-
cat C G
-
G context
-
abstract
-
N/A
-
yes
-
-
-
fun f : A
-
A type
-
abstract
-
N/A
-
yes
-
-
-
def f ps = t
-
f fun, ps patterns, t term
-
abstract
-
yes
-
no
-
-
-
data C = f | ... | g
-
C cat, f...g fun
-
abstract
-
yes
-
no
-
-
-
lincat C = T
-
C cat, T type
-
concrete*
-
yes
-
yes
-
-
-
lin f = t
-
f fun, t term
-
concrete*
-
no
-
yes
-
-
-
lindef C = t
-
C cat, t term
-
concrete*
-
yes
-
no
-
-
-
linref C = t
-
C cat, t term
-
concrete*
-
yes
-
no
-
-
-
printname cat C = t
-
C cat, t term
-
concrete*
-
yes
-
no
-
-
-
printname fun f = t
-
f fun, t term
-
concrete*
-
yes
-
no
-
-
-
param P = C| ... | D
-
C...D constructors
-
resource*
-
N/A
-
yes
-
-
-
oper f : T = t
-
T type, t term
-
resource*
-
N/A
-
yes
-
-
-
flags o = v
-
o flag, v value
-
all
-
yes
-
N/A
-
-
-
-
-
-Judgements that have default values are rarely used, except lincat and
-flags, which often need values different from the defaults.
-
-
-Introducing a name twice in the same module is an error. In other words,
-all judgements that have a "yes" in the name base column, must
-have distinct identifiers on their left-hand sides.
-
-
-All judgement end with semicolons (;).
-
-
-In addition to the syntax given in the table, many of the forms have
-syntactic sugar. This sugar will be explained below in connection to
-each form. There are moreover two kinds of syntactic sugar common to all forms:
-
-
-
the judgement keyword is shared between consecutive judgements
- until a new keyword appears:
-
-keyw J ; K ; === keyw J ; keyw K ;
-
-
the right-hand sides of colon (:) and equality (=)
- can be shared, by using comma (,) as separator of left-hand sides, which
- must consist of identifiers
-
-c,d : T === c : T ; d : T ;
-
-c,d = t === c = t ; d = t ;
-
-
-
-
-These conventions, like all syntactic sugar, are performed at an
-early compilation phase, directly after parsing. This means that e.g.
-
-
- lin f,g = \x -> x ;
-
-
-can be correct even though f and g required different
-function types.
-
-
-Within a module, judgements can occur in any order. In particular,
-a name can be used before it is introduced.
-
-
-The explanations of judgement forms refer to the notions
-of type and term (the latter also called expression).
-These notions will be explained in detail here.
-
-
-
Category declarations, cat
-
-
-
-
-Category declarations
-
-catCG
-
-define the basic types of abstract syntax.
-A basic type is formed from a category by giving values to all variables
-in the contextG. If the context is empty, the
-basic type looks the same as the category itself. Otherwise, application
-syntax is used:
-
-Ca1...an
-
-
-
-
Hypotheses and contexts
-
-
-
-
-A context is a sequence of hypotheses, i.e. variable-type pairs.
-A hypothesis is written
-
-(x:T)
-
-and a sequence does not have any separator symbols. As syntactic sugar,
-
-
-
variables can share a type,
-
-(x,y:T) === (x:T)(y:T)
-
-
a wildcard can be used for a variable not occurring in types
- later in the context,
-
-(_:T) === (x:T)
-
-
if the variable does not occur later, it can be omitted altogether, and
- parentheses are not used,
-
- T === (x:T)
-
- But if T is more complex than an identifier, it needs parentheses to
- be separated from the rest of the context.
-
-
-
-An abstract syntax has dependent types, if any of its categories has
-a non-empty context.
-
-
-
Function declarations, fun
-
-Function declarations,
-
- funf:T
-
-define the syntactic constructors of abstract
-syntax. The type T of f
-is built built from basic types (formed from categories) by using
-the function type constructor ->. Thus its form is
-
- (x1:A1) -> ... -> (xn:An) ->B
-
-where Ai are types, called the argument types, and B is a
-basic type, called the value type of f. The value category of
-f is the category that forms the type B.
-
-
-A syntax tree is formed from f by applying it to a full list of
-arguments, so that the result is of a basic type.
-
-
-A higher-order function is one that has a function type as an
-argument. The concrete syntax of GF does not support displaying the
-bound variables of functions of higher than second order, but they are
-legal in abstract syntax.
-
-
-An abstract syntax is context-free, if it has neither dependent
-types nor higher-order functions. Grammars with context-free abstract
-syntax are an important subclass of GF, with more limited complexity
-than full GF. Whether the concrete syntax is context-free in the sense
-of the Chomsky hierarchy is independent of the context-freeness of
-the abstract syntax.
-
-
-
Function definitions, def
-
-Function definitions,
-
- deffp1 ... pn=t
-
-where f is a fun function and pi# are patterns,
-impose a relation of definitional equality on abstract syntax
-trees. They form the basis of computation, which is used
-when comparing whether two types are equal; this notion is relevant
-only if the types are dependent. Computation can also be used for
-the normalization of syntax trees, which applies even in
-context-free abstract syntax.
-
-
-The set of def definitions for f can be scattered around
-the module in which f is introduced as a function. The compiler
-builds the set of pattern equations in the order in which the
-equations appear; this order is significant in the case of
-overlapping patterns. All equations must appear in the same module in
-which f itself declared.
-
-
-The syntax of patterns will be specified here, commonly for
-abstract and concrete syntax. In abstract
-syntax, constructor patterns are those of the form
-
- Cp1 ... pn
-
-where C is declared as data for some abstract syntax category
-(see next section). A variable pattern is either an identifier or
-a wildcard.
-
-
-A common pitfall is to forget to declare a constructor as data, which
-causes it to be interpreted as a variable pattern in definitions.
-
-
-Computation is performed by applying definitions and beta conversions,
-and in general by using pattern matching. Computation and pattern matching
-are explained commonly for abstract and concrete syntax here.
-
-
-In contrast to concrete syntax, abstract syntax computation is
-completely symbolic: it does not produce a value, but just another
-term. Hence it is not an error to have incomplete systems of
-pattern equations for a function. In addition, the definitions
-can be recursive, which means that computation can fail to terminate;
-this can never happen in concrete syntax.
-
-
-
Data constructor definitions, data
-
-A data constructor definition,
-
- dataC=f1| ... |fn
-
-defines the functions f1...fn to be constructors
-of the category C. This means that they are recognized as constructor
-patterns when used in function definitions.
-
-
-In order for the data constructor definition to be correct,
-f1...fn must be functions with C as their value category.
-
-
-The complete set of constructors for a category C is the union of
-all its data constructor definitions. Thus a category can be "extended"
-by new constructors afterwards. However, all these constructor definitions
-must appear in the same module in which the category is itself defined.
-
-
-There is syntactic sugar for declaring a function as a constructor at
-the same time as introducing it:
-
-dataf : A1-> ... ->An->Ct1 ... tm
-
-
- ===
-
-
-funf : A1-> ... ->An->Ct1 ... tm ;
- dataC = f
-
-
-
-
The semantic status of an abstract syntax function
-
-There are three possible statuses for a function declared in a fun judgement:
-
-
-
primitive notion: the default status
-
constructor: the function appears on the right-hand side in data judgement
-
defined: the function has a def definition
-
-
-
-The "constructor" and "defined" statuses are in contradiction with each other,
-whereas the primitive notion status is overridden by any of the two others.
-
-
-This distinction is relevant for the semantics of abstract syntax, not
-for concrete syntax. It shows in the way patterns are treated in
-equations in def definitions: a constructor
-in a pattern matches only itself, whereas
-any other name is treated as a variable pattern, which matches
-anything.
-
-
-
Linearization type definitions, lincat
-
-A linearization type definition,
-
- lincatC=T
-
-defines the type of linearizations of trees whose type has category C.
-Type dependences have no effect on the linearization type.
-
-
-The type T must be a legal linearization type, which means that it
-is a record type whose fields have either parameter types, the type Str
-of strings, or table or record types of these. In particular, function types
-may not appear in T. A detailed explanation of types in concrete syntax
-will be given here.
-
-
-If K is the concrete syntax of an abstract syntax A, then K must
-define the linearization type of all categories declared in A. However,
-the definition can be omitted from the source code, in which case the default
-type {s : Str} is used.
-
-
-
Linearization definitions, lin
-
-A linearization definition,
-
- linf=t
-
-defines the linearizations function of function f, i.e. the function
-used for linearizing trees formed by f.
-
-
-The type of t must be the homomorphic image of the type of f.
-In other words, if
-
- funf:A1-> ... ->An->A
-
-then
-
- linf:A1* -> ... ->An* ->A*
-
-where the type T* is defined as follows depending on T:
-
-
-
(Ct1 ... tn)* = T, if lincatC=T
-
(B1-> ... ->Bm->B)* = B* ** {$0,...,$m : Str}
-
-
-
-The second case is relevant for higher-order functions only. It says that
-the linearization type of the value type is extended by adding a string field
-for each argument types; these fields store the variable symbol used for
-the binding of each variable.
-
-
-
-
-
-Since the arguments of a function argument are treated as bare strings,
-orders higher than the second are irrelevant for concrete syntax.
-
-
-There is syntactic sugar for binding the variables of the linearization
-of a function on the left-hand side:
-
- linfp=t === linf= \p->t
-
-The pattern p must be either a variable or a wildcard (_); this is
-what the syntax of lambda abstracts (\p -> t) requires.
-
-
-
Linearization default definitions, lindef
-
-
-
-
-A linearization default definition,
-
- lindefC=t
-
-defines the default linearization of category C, i.e. the function
-applicable to a string to make it into an object of the linearization
-type of C.
-
-
-Linearization defaults are invoked when linearizing variable bindings
-in higher-order abstract syntax. A variable symbol is then presented
-as a string, which must be converted to correct type in order for
-the linearization not to fail with an error.
-
-
-The other use of the defaults is for linearizing metavariables
-and abstract functions without linearization in the concrete syntax.
-In the first case the default linearization is applied to
-the string "?X" where X is the unique index
-of the metavariable, and in the second case the string is
-"[f]" where f is the name of the abstract
-function with missing linearization.
-
-
-
-Usually, linearization defaults are generated by using the default
-rule that "uses the symbol itself for every string, and the
-first value of the parameter type for every parameter". The precise
-definition is by structural recursion on the type:
-
-
-
default(Str,s) = s
-
default(P,s) = #1(P)
-
default(P => T,s) = \\_ => default(T,s)
-
default({... ; r : R ; ...},s) = {... ; r : default(R,s) ; ...}
-
-
-
-The notion of the first value of a parameter type (#1(P)) is defined
-below.
-
-
-
Linearization reference definitions, linref
-
-
-
-
-A linearization reference definition,
-
- linrefC=t
-
-defines the reference linearization of category C, i.e. the function
-applicable to an object of the linearization type of C to make it into a string.
-
-
-The reference linearization is always applied to the top-level node
-of the abstract syntax tree. For example when we linearize the
-tree f x1 x2 .. xn, then we first apply f
-to its arguments which gives us an object of the linearization type of
-its category. After that we apply the reference linearization
-for the same category to get a string out of the object. This
-is particularly useful when the linearization type of C
-contains discontious constituents. In this case usually the reference
-linearization glues the constituents together to produce an
-intuitive linearization string.
-
-
-The reference linearization is also used for linearizing metavariables
-which stand in function position. For example the tree
-f (? x1 x2 .. xn) is linearized as follows. Each
-of the arguments x1 x2 .. xn is linearized, and after that
-the reference linearization of the its category is applied
-to the output of the linearization. The result is a sequence of n
-strings which are concatenated into a single string. The final string
-is the input to the default linearization of the category
-for the argument of f. After applying the default linearization
-we get an object that we could safely pass to f.
-
-
-Usually, linearization references are generated by using the
-rule that "picks the first string in the linearization type". The precise
-definition is by structural recursion on the type:
-
-Here each call to reference returns either (Just o) or Nothing.
-When we compute the reference for a table or a record then we pick
-the reference for the first expression for which the recursive call
-gives us Just. If we get Nothing for
-all of them then the final result is Nothing too.
-
-
-
Printname definitions, printname cat and printname fun
-
-A category printname definition,
-
- printname catC=s
-
-defines the printname of category C, i.e. the name used
-in some abstract syntax information shown to the user.
-
-
-Likewise, a function printname definition,
-
- printname funf=s
-
-defines the printname of function f, i.e. the name used
-in some abstract syntax information shown to the user.
-
-
-The most common use of printnames is in the interactive syntax
-editor, where printnames are displayed in menus. It is possible
-e.g. to adapt them to each language, or to embed HTML tooltips
-in them (as is used in some HTML-based editor GUIs).
-
-
-Usually, printnames are generated automatically from the symbol
-and/or concrete syntax information.
-
-
-
Parameter type definitions, param
-
-
-
-
-A parameter type definition,
-
- paramP=C1G1| ... |CnGn
-
-defines a parameter type P with the parameter constructors
-C1...Cn, with their respective contexts G1...Gn.
-
-
-
-
-
-Contexts have the same syntax as in cat judgements, explained
-here. Since dependent types are not available in
-parameter type definitions, the use of variables is never
-necessary. The types in the context must themselves be parameter types,
-which are defined as follows:
-
-
-
Given the judgement paramP ..., P is a parameter type.
-
A record type of parameter types is a parameter type.
-
Intsn (an initial segment of integers) is a parameter type.
-
-
-
-The names defined by a parameter type definition include both the
-type name P and the constructor names Ci. Therefore all these
-names must be distinct in a module.
-
-
-A parameter type may not be recursive, i.e. P itself may not occur in
-the contexts of its constructors. This restriction extends to mutual
-recursion: we say that Pdepends on the types that occur
-in the contexts of its constructors and on all types that those types
-depend on, and state that P may not depend on itself.
-
-
-In an interface module, it is possible to declare a parameter type
-without defining it,
-
- paramP;
-
-
-
-
Parameter values
-
-
-
-
-All parameter types are finite, and the GF compiler will internally
-compute them to lists of parameter values. These lists are formed by
-traversing the param definitions, usually respecting the
-order of constructors in the source code. For records, bibliographical
-sorting is applied. However, both the order of traversal of param
-definitions and the order of fields in a record are specified
-in a compiler-internal way, which means that the programmer should not
-rely on any particular order.
-
-
-The order of the list of parameter values can affect the program in two
-cases:
-
-
-
in the default lindef definition (here),
- the first value is chosen
-
in course-of-value tables (here), the compiler-internal order is
- followed
-
-
-
-The first usage implies that, if lindef definitions are essential for
-the application, they should be given manually. The second usage implies that
-course-of-value tables should be avoided in hand-written GF code.
-
-
-In run-time grammar generation, all parameter values are translated to
-integers denotions positions in these parameter lists.
-
-
-
Operation definitions, oper
-
-An operation definition,
-
- operh:T=t
-
-defines an operationh of type T, with the computation rule
-
- h ==> t
-
-The type T can be any concrete syntax type, including function
-types of any order. The term t must have the type T, as
-defined here.
-
-
-As syntactic sugar, the type can be omitted,
-
- operh=t
-
-which works in two cases
-
-
-
the type can be inferred from t (compiler-dependent)
-
the definition occurs in an instance and the type is given in
- the interface
-
-
-
-It is also possible to give the type and the definition separately:
-
-operh:T ; operh=t ===
- operh:T=t
-
-The order of the type part and the definition part is free, and there
-can be other judgements in between. However, they must occur in the
-same resource module for it to be complete (as defined here).
-In an interface module, it is enough to give the type.
-
-
-When only the definition is given, it is possible to use a shorthand
-similar to lin judgements:
-
-operhp=t === operh=\p->t
-
-The pattern p is either a variable or a wildcard (_).
-
-
-Operation definitions may not be recursive, not even mutually recursive.
-This condition ensures that functions can in the end be eliminated from
-concrete syntax code (as explained here).
-
-
-
Operation overloading
-
-
-
-
-One and the same operation name h can be used for different operations,
-which have to have different types. For each call of h, the type checker
-selects one of these operations depending on what type is expected in the
-context of the call. The syntax of overloaded operation definitions is
-
-Notice that h must be the same in all cases.
-This format can be used to give the complete implementation; to give just
-the types, e.g. in an interface, one can use the form
-
-operh
- : overload {h : T1 ; ... ; h : Tn}
-
-The implementation of this operation typing is given by a judgement of
-the first form. The order of branches need not be the same.
-
-
-
Flag definitions, flags
-
-A flag definition,
-
- flagso=v
-
-sets the value of the flag o, to be used when compiling or using
-the module.
-
-
-The flag o is an identifier, and the value v is either an identifier
-or a quoted string.
-
-
-Flags are a kind of metadata, which do not strictly belong to the GF
-language. For instance, compilers do not necessarily check the
-consistency of flags, or the meaningfulness of their values.
-The inheritance of flags is not well-defined; the only certain rule
-is that flags set in the module body override the settings from
-inherited modules.
-
-
-Here are some flags commonly included in grammars.
-
-
-
-
flag
-
value
-
description
-
module
-
-
-
coding
-
character encoding
-
encoding used in string literals
-
concrete
-
-
-
startcat
-
category
-
default target of parsing
-
abstract
-
-
-
-
-
-The possible values of these flags are
-specified here. Note that
-the lexer and unlexer flags are
-deprecated. If you need their functionality, you should use supply
-them to GF shell commands like so:
-
-
put_string -lextext "страви, напої" | parse
-
-A summary of their possible values can be found at the GF shell
- reference.
-
-
-
-
Types and expressions
-
-
Overview of expression forms
-
-
-
-
-Like many dependently typed languages, GF makes no syntactic distinction
-between expressions and types. An illegal use of a type as an expression or
-vice versa comes out as a type error. Whether a variable, for instance,
-stands for a type or an expression value, can only be resolved from its
-context of use.
-
-
-One practical consequence of the common syntax is that global and local definitions
-(oper judgements and let expressions, respectively) work in the same way
-for types and expressions. Thus it is possible to abbreviate a type
-occurring in a type expression:
-
-
- let A = {s : Str ; b : Bool} in A -> A -> A
-
-
-Type and other expressions have a system of precedences. The following table
-summarizes all expression forms, from the highest to the lowest precedence.
-Some expressions are moreover left- or right-associative.
-
-
-
-
prec
-
expression example
-
explanation
-
-
-
7
-
c
-
constant or variable
-
-
-
7
-
Type
-
the type of types
-
-
-
7
-
PType
-
the type of parameter types
-
-
-
7
-
Str
-
the type of strings/token lists
-
-
-
7
-
"foo"
-
string literal
-
-
-
7
-
123
-
integer literal
-
-
-
7
-
0.123
-
floating point literal
-
-
-
7
-
?
-
metavariable
-
-
-
7
-
[]
-
empty token list
-
-
-
7
-
[C a b]
-
list category
-
-
-
7
-
["foo bar"]
-
token list
-
-
-
7
-
{"s : Str ; n : Num}
-
record type
-
-
-
7
-
{"s = "foo" ; n = Sg}
-
record
-
-
-
7
-
<Sg,Fem,Gen>
-
tuple
-
-
-
7
-
<n : Num>
-
type-annotated expression
-
-
-
6 left
-
t.r
-
projection or qualification
-
-
-
5 left
-
f a
-
function application
-
-
-
5
-
table {Sg => [] ; _ => "xs"}
-
table
-
-
-
5
-
table P [a ; b ; c]
-
course-of-values table
-
-
-
5
-
case n of {Sg => [] ; _ => "xs"}
-
case expression
-
-
-
5
-
variants {"color" ; "colour"}
-
free variation
-
-
-
5
-
pre {vowel => "an" ; _ => "a"}
-
prefix-dependent choice
-
-
-
4 left
-
t ! v
-
table selection
-
-
-
4 left
-
A * B
-
tuple type
-
-
-
4 left
-
R ** {b : Bool}
-
record (type) extension
-
-
-
3 left
-
t + s
-
token gluing
-
-
-
2 left
-
t ++ s
-
token list concatenation
-
-
-
1 right
-
\x,y -> t
-
function abstraction ("lambda")
-
-
-
1 right
-
\\x,y => t
-
table abstraction
-
-
-
1 right
-
(x : A) -> B
-
dependent function type
-
-
-
1 right
-
A -> B
-
function type
-
-
-
1 right
-
P => T
-
table type
-
-
-
1 right
-
let x = v in t
-
local definition
-
-
-
1
-
t where {x = v}
-
local definition
-
-
-
1
-
in M.C "foo"
-
rule by example
-
-
-
-
-
-Any expression in parentheses ((exp)) is in the highest
-precedence class.
-
-
-
The functional fragment: expressions in abstract syntax
-
-
-
-
-The expression syntax is the same in abstract and concrete syntax, although
-only a part of the syntax is actually usable in well-typed expressions in
-abstract syntax. An abstract syntax is essentially used for defining a set
-of types and a set of functions between those types. Therefore it needs
-essentially the functional fragment
-of the syntax. This fragment comprises two kinds of types:
-
-
-
basic types, of form C a1...an where
-
-
catC (x1 : A1)...(xn : An), including the predefined
- categories Int, Float, and String explained here
-
a1 : A1,...,an : An{x1 = a1,...,xn-1=an-1}
-
-
-
-
-
function types, of form (x : A) ->B, where
-
-
A is a type
-
B is a type possibly depending on x : A
-
-
-
-
-When defining basic types, we used the notation
-t{x1 = t1,...,xn=tn}
-for the substitution of values to variables. This is a metalevel notation,
-which denotes a term that is formed by replacing the free occurrences of
-each variable xi by ti.
-
-
-These types have six kinds of expressions:
-
-
-
constants, f : A where
-
-
funf : A
-
-
-
-
-
literals for integers, floats, and strings (defined in here)
-
-
-
-
variables, x : A where
-
-
x has been introduced by a binding
-
-
-
-
-
applications, f a : B{x=a}, where
-
-
f : (x : A) ->B
-
a : A
-
-
-
-
-
abstractions, \x->b : (x : A) ->B, where
-
-
b : B possibly depending on x : A
-
-
-
-
-
metavariables, ?, as introduced in intermediate phases of
- incremental type checking; metavariables are not permitted
- in GF source code
-
-
-
-
-
-
-The notion of binding is defined for occurrences of variables in
-subexpressions as follows:
-
-
-
in (x : A) ->B, x is bound in B
-
in \x->b, x is bound in b
-
in deffp1 ... pn = t, any pattern variable introduced in
- any pi is bound in t (as defined here)
-
-
-
-As syntactic sugar, function types have sharing of types and
-suppression of variables, in the same way as contexts
-(defined here):
-
-
-
variables can share a type,
-
-(x,y:A)->B ===
- (x:A) -> (y:A) ->B
-
-
a wildcard can be used for a variable not occurring later in the type,
-
-(_:A) ->B ===
- (x:T) ->B
-
-
if the variable does not occur later, it can be omitted altogether, and
- parentheses are not used,
-
- A->B === (_:A) ->B
-
-
-
-
-There is analogous syntactic sugar for constant functions,
-
-\_->t === \x->t
-
-where x does not occur in t, and for multiple lambda abstractions:
-
-\p,q->t === \p->\q->t
-
-where p and q are variables or wild cards (_).
-
-
-
Conversions
-
-
-
-
-Among expressions, there is a relation of definitional equality defined
-by four conversion rules:
-
-
-
alpha conversion:
- \x->b = \y->b{x=y}
-
-
-
-
beta conversion: (\x->b) a = b{x=a}
-
-
-
-
delta conversion: fa1 ... an = tg, if
-
-
there is a definition deffp1 ... pn = t
-
this definition is the first for f that matches the sequence
- a1 .... an, with the substitution g
-
-
-
-
-
eta conversion: c = \x->c x,
- if c : (x : A) ->B
-
-
-
-Pattern matching substitution used in delta conversion
-is defined here.
-
-
-An expression is in beta-eta-normal form if
-
-
-
it has no subexpressions to which beta conversion applies (beta normality)
-
each constant or variable whose type is a function type must be
- eta-expanded, i.e. made into an abstract equal to it by eta conversion
- (eta normality)
-
-
-
-Notice that the iteration of eta expansion would lead to an expression not
-in beta-normal form.
-
-
-
Syntax trees
-
-
-
-
-The syntax trees defined by an abstract syntax are well-typed
-expressions of basic types in beta-eta normal form.
-Linearization defined in concrete
-syntax applies to all and only these expressions.
-
-
-There is also a direct definition of syntax trees, which does not
-refer to beta and eta conversions: keeping in mind that a type always has
-the form
-
-(x1 : A1) -> ... -> (xn : An) ->B
-
-where Ai are types and B is a basic type, a syntax tree is an expression
-
-bt1 ... tn : B'
-
-where
-
-
-
B' is the basic type B{x1 = t1,...,xn = tn}
-
funb : (x1 : A1) -> ... -> (xn : An) ->B
-
each ti has the form \z1,...,zm->c where Ai is
-
-(y1 : B1) -> ... -> (ym : Bm) ->B
-
-
-
-
-
Predefined types in abstract syntax
-
-
-
-
-GF provides three predefined categories for abstract syntax, with predefined
-expressions:
-
-
-
-
category
-
expressions
-
-
-
Int
-
integer literals, e.g. 123
-
-
-
Float
-
floating point literals, e.g. 12.34
-
-
-
String
-
string literals, e.g. "foo"
-
-
-
-
-
-These categories take no arguments, and they can be used as basic
-types in the same way as if they were introduced in cat judgements.
-However, it is not legal to define fun functions that have any
-of these types as value type: their only well-typed expressions are
-literals as defined in the above table.
-
-
-
Overview of expressions in concrete syntax
-
-
-
-
-Concrete syntax is about defining mappings from abstract syntax trees
-to concrete syntax objects. These objects comprise
-
-
-
records
-
tables
-
strings
-
parameter values
-
-
-
-Thus functions are not concrete syntax objects; however, the
-mappings themselves are expressed as functions, and the source code
-of a concrete syntax can use functions under the condition that
-they can be eliminated from the final compiled grammar (which they
-can; this is one of the fundamental properties of compilation, as
-explained in more detail in the JFP article).
-
-
-Concrete syntax thus has the same function types and expression forms as
-abstract syntax, specified here. The basic types defined
-by categories (cat judgements) are available via grammar reuse
-explained here; this also comprises the
-predefined categories Float and String.
-
-
-
Values, canonical forms, and run-time variables
-
-In abstract syntax, the conversion rules fiven here
-define a computational relation
-among expressions, but there is no separate notion of a value of
-computation: the value (the end point) of a computation chain is
-simply an expression to which no more conversions apply. In general,
-we are interested in expressions that satisfy the conditions of being
-syntax trees (as defined here), but there can be many computationally
-equivalent syntax trees which nonetheless are distinct syntax trees
-and hence have different linearizations. The main use of computation
-in abstract syntax is to compare types in dependent type checking.
-
-
-In concrete syntax, the notion of values is central. At run time,
-we want to compute the values of linearizations; at compile time, we want
-to perform partial evaluation, which computes expressions as far as
-possible.
-To specify what happens
-in computation we therefore have to distinguish between canonical forms
-and other forms of expressions. The canonical forms are defined separately
-for each form of type, whereas the other forms may usually produce expressions
-of any type.
-
-
-
-
-
-
-What is done at compile time is the elimination of any noncanonical forms,
-except for those depending on run-time variables. Run-time variables are
-the same as the argument variables of linearization rules, i.e. the
-variables x1,...,xn in
-
-linf= \x1,...,xn->t
-
-where
-
-funf:
-(x1 : A1) -> ... -> (xn : An) ->B
-
-Notice that this definition refers to the eta-expanded linearization term,
-which has one abstracted variable for each argument type of f. These variables
-are not necessarily explicit in GF source code, but introduced by the compiler.
-
-
-Since certain expression forms should be eliminated in compilation but
-cannot be eliminated if run-time variables appear in them, errors can
-appear late in compilation. This is an issue with the following
-expression forms:
-
predefined string operations, defined here (those taking
- Str arguments)
-
-
-
-
Token lists, tokens, and strings
-
-
-
-
-The most prominent basic type is Str, the type of token lists.
-This type is often sloppily referred to as the type of strings;
-but it should be kept in mind that the objects of Str are
-lists of strings rather than single strings.
-
-
-Expressions of type Str have the following canonical forms:
-
-
-
tokens, i.e. string literals, in double quotes, e.g. "foo"
-
the empty token list, []
-
concatenation, s++t, where s,t : Str
-
prefix-dependent choice,
- pre {p1 => s1 ; ... ; pn => sn ; _ => s }, where
-
-
s, s1,...,sn, p1,...,pn : Str
-
-
-
-
-For convenience, the notation is overloaded so that tokens are identified
-with singleton token lists, and there is no separate type of tokens
-(this is a change from the JFP article).
-The notion of a token
-is still important for compilation: all tokens introduced by
-the grammar must be known at compile time. This, in turn, is
-required by the parsing algorithms used for parsing with GF grammars.
-
-
-In addition to string literals, tokens can be formed by a specific
-non-canonical operator:
-
-
-
gluing, s+t, where s,t : Str
-
-
-
-
-
-
-Being noncanonical, gluing is equipped with a computation rule:
-string literals are glued by forming a new string literal, and
-empty token lists can be ignored:
-
-
-
"foo" + "bar" ==> "foobar"
-
t+ [] ==> t
-
[] +t ==> t
-
-
-
-Since tokens must be known at compile time,
-the operands of gluing may not depend on run-time variables,
-as defined here.
-
-
-As syntactic sugar, token lists can be given as bracketed string literals, where
-spaces separate tokens:
-
-Notice that there are no empty tokens, but the expression []
-can be used in a context requiring a token, in particular in gluing expression
-below. Since [] denotes an empty token list, the following computation laws
-are valid:
-
-
-
t++ [] ==> t
-
[] ++t ==> t
-
-
-
-Moreover, concatenation and gluing are associative:
-
-
-
s + (t + u) ==> s + t + u
-
s ++ (t ++ u) ==> s ++ t ++ u
-
-
-
-For the programmer, associativity and the empty token laws mean
-that the compiler can use them to simplify string expressions.
-It also means that these laws are respected in pattern matching
-on strings.
-
-
-A prime example of prefix-dependent choice operation is the following
-approximative expression for the English indefinite article:
-
-This expression can be computed in the context of a subsequent token:
-
-
-
pre {p1 => s1 ; ... ; pn => sn ; _ => s } ++ t
- ==>
-
-
si for the first i such that the prefix pi
- matches t, if it exists
-
s otherwise
-
-
-
-
-The matching prefix is defined by comparing the string with the prefix of
-the token. If the prefix is a variant list of strings, then it matches
-the token if any of the strings in the list matches it.
-
-
-The computation rule can sometimes be applied at compile time, but it general,
-prefix-dependent choices need to be passed to the run-time grammar, because
-they are not given a subsequent token to compare with, or because the
-subsequent token depends on a run-time variable.
-
-
-The prefix-dependent choice expression itself may not depend on run-time
-variables.
-
-
- There is an older syntax for prefix-dependent choice,
- namely: pre { s ; s1 / p1 ; ... ; sn / pn}. This syntax
- will not accept strings as patterns.
-
-
-In GF prior to 3.0, a specific typeStrs
-is used for defining prefixes,
-instead of justvariantsofStr.
-
-
-
Records and record types
-
-A record is a collection of objects of possibly different types,
-accessible by projections from the record with labels pointing
-to these objects. A record is also itself an object, whose type is
-a record type. Record types have the form
-
- {r1 : A1; ... ;rn : An}
-
-where n >= 0, each Ai is a type, and the labels ri are
-distinct. A record of this type has the form
-
- {r1 = a1; ... ;rn = an}
-
-where each #aii : "Aii. A limiting case is the empty record type
-{}, which has the object {}, the empty record.
-
-
-The fields of a record type are its parts of the form r : A,
-also called typings. The fields of a record are of the form
-r = a, also called value assignments. Value assignments
-may optionally indicate the type, as in r : A = a.
-
-
-The order of fields in record types and records is insignificant: two record
-types (or records) are equal if they have the same fields, in any order, and a
-record is an object of a record type, if it has type-correct value assignments
-for all fields of the record type.
-The latter definition implies the even stronger
-principle of record subtyping: a record can have any type that has some
-subset of its fields. This principle is explained further
-here.
-
-
-All fields in a record must have distinct labels. Thus it is not possible
-e.g. to "redefine" a field "later" in a record.
-
-
-Lexically, labels are identifiers (defined here).
-This is with the exception
-of the labels selecting bound variables in the linearization of higher-order
-abstract syntax, which have the form $i for an integer i,
-as specified here.
-In source code, these labels should not appear in records fields,
-but only in selections.
-
-
-Labels occur only in syntactic positions where they cannot be confused with
-constants or variables. Therefore it is safe to write, as in Prelude,
-
-
- ss : Str -> {s : Str} = \s -> {s = s} ;
-
-
-A projection is an expression of the form
-
- t.r
-
-where t must be a record and r must be a label defined in it.
-The type of the projection is the type of that field.
-The computation rule for projection returns the value assigned to that field:
-
-{ ... ;r = a; ... }.r ==> a
-
-Notice that the dot notation t.r is also used for qualified names
-as specified here.
-This ambiguity follows tradition and convenience. It is
-resolved by the following rules (before type checking):
-
-
-
if t is a bound variable or a constant in scope,
- t.r is type-checked as a projection
-
otherwise, t.r is type-checked as a qualified name
-
-
-
-As syntactic sugar, types and values can be shared:
-
-Another syntactic sugar are tuple types and tuples, which are translated
-by endowing their unlabelled fields by the labels p1, p2,... in the
-order of appearance of the fields:
-
-
-
A1* ... *An ===
- {p1 : A1; ... ;pn : An}
-
<a1, ... ,an> ===
- {p1 = a1; ... ;pn = an}
-
-
-
-A record extension is formed by adding fields to a record or a record type.
-The general syntax involves two expressions,
-
- R**S
-
-The result is a record type or a record with a union of the fields of R and
-S. It is therefore well-formed if
-
-
-
both R and S are either records or record types
-
the labels in R and S are disjoint, if R and S are record types
-
-(Since GF version 3.6) If R and S are record objects,
-then the labels in them need not be disjoint. Labels defined in
-S are then given priority, so that record extensions in fact
-works as record update. A common pattern of using this feature
-is
-
- lin F x ... = x ** {r = ... x.r ...}
-
-where x is a record with many fields, just one of which is
-updated. Following the normal binding conditions, x.r on the
-right hand side still refers to the old value of the r field.
-
-
-
-
Subtyping
-
-
-
-
-The possibility of having superfluous fields in a record forms the basis of
-the subtyping relation.
-That A is a subtype of B means that a : A implies a : B.
-This is clearly satisfied for records with superfluous fields:
-
-
-
if R is a record type without the label r,
- then R** {r : A} is a subtype of R
-
-
-
-The GF grammar compiler extends subtyping to function types by covariance
-and contravariance:
-
-
-
covariance: if A is a subtype of B,
- then C->A is a subtype of C->B
-
contravariance: if A is a subtype of B,
- then B->C is a subtype of A->C
-
-
-
-The logic of these rules is natural: if a function is returns a value
-in a subtype, then this value is a fortiori in the supertype.
-If a function is defined for some type, then it is a fortiori defined
-for any subtype.
-
-
-In addition to the well-known principles of record subtyping and co- and
-contravariance, GF implements subtyping for initial segments of integers:
-
-
-
if m < n, then Intsm is a subtype of Intsn
-
Intsn is a subtype of Integer
-
-
-
-As the last rule, subtyping is transitive:
-
-
-
if A is a subtype of B and B is a subtype of C, then
- A is a subtype of C.
-
-
-
-
Tables and table types
-
-
-
-
-One of the most characteristic constructs of GF is tables, also called
-finite functions. That these functions are finite means that it
-is possible to finitely enumerate all argument-value pairs; this, in
-turn, is possible because the argument types are finite.
-
-
-A table type has the form
-
-P=>T
-
-where P must be a parameter type in the sense defined here, whereas
-T can be any type.
-
-
-Canonical expressions of table types are tables, of the form
-
-table{V1=>t1 ; ... ; Vn=>tn}
-
-where V1,...,Vn is the complete list of the parameter values of
-the argument type P (defined here), and each ti is
-an expression of the value type T.
-
-
-In addition to explicit enumerations,
-tables can be given by pattern matching,
-
-table{p1=>t1 ; ... ; pm=>tm}
-
-where p1,....,pm is a list of patterns that covers all values of type P.
-Each pattern pi may bind some variables, on which the expression ti
-may depend. A complete account of patterns and pattern matching is given
-here.
-
-
-A course-of-values table omits the patterns and just lists all
-values. It uses the enumeration of all values of the argument type P
-to pair the values with arguments:
-
-tableP[t1 ; ... ; tn]
-
-This format is not recommended for GF source code, since the
-ordering of parameter values is not specified and therefore a
-compiler-internal decision.
-
-
-The argument type can be indicated in ordinary tables as well, which is
-sometimes helpful for type inference:
-
-tableP{ ... }
-
-
-
-The selection operator !, applied to a table t and to an expression
-v of its argument type
-
-t!v
-
-returns the first pattern matching result from t with v, as defined
-here. The order of patterns is thus significant as long as the
-patterns contain variables or wildcards. When the compiler reorders the
-patterns following the enumeration of all values of the argument type,
-this order no longer matters, because no overlap remains between patterns.
-
-
-The GF compiler performs table expansion, i.e. an analogue of
-eta expansion defined here, where a table is applied to all
-values to its argument type:
-
-t : P=>T ==>
-tableP[t!V1 ; ... ; t!Vn]
-
-As syntactic sugar, one-branch tables can be written in a way similar to
-lambda abstractions:
-
-\\p=>t === table {p=>t}
-
-where p is either a variable or a wildcard (_). Multiple bindings
-can be abbreviated:
-
-\\p,q=>t === \\p=>\\q=>t
-
-Case expressions are syntactic sugar for selections:
-
-caseeof {...} === table {...} !e
-
-
-
-
Pattern matching
-
-
-
-
-We will list all forms of patterns that can be used in table branches.
-We define their variable bindings and matching substitutions.
-
-
-We start with the patterns available for all parameter types, as well
-as for the types Integer and Str.
-
-
-
A constructor pattern Cp1...pn
- binds the union of all variables bound in the subpatterns
- p1,...,pn.
- It matches any value
- CV1...Vn where each pi# matches Vi,
- and the matching substitution is the union of these substitutions.
-
A record pattern
- {r1=p1; ... ;rn=pn}
- binds the union of all variables bound in the subpatterns
- p1,...,pn.
- It matches any value
- {r1=V1; ... ;rn=Vn; ...}
- where each pi# matches Vi,
- and the matching substitution is the union of these substitutions.
-
A variable pattern x
- (identifier other than parameter constructor)
- binds the variable x.
- It matches any value V, with the substitution {x = V}.
-
The wild card _ binds no variables.
- It matches any value, with the empty substitution.
-
A disjunctive pattern p|q binds the intersection of
- the variables bound by p and q.
- It matches anything that
- either p or q matches, with the first substitution starting
- with p matches, from which those
- variables that are not bound by both patterns are removed.
-
A negative pattern -p binds no variables.
- It matches anything that p does not match, with the empty
- substitution.
-
An alias pattern x@p binds x and all the variables
- bound by p. It matches any value V that p matches, with
- the same substition extended by {x = V}.
-
-
-
-The following patterns are only available for the type Str:
-
-
-
A string literal pattern, e.g. "s", binds no variables.
- It matches the same string, with the empty substitution.
-
A concatenation pattern, p+q,
- binds the union of variables bound by p and q.
- It matches any string that consists
- of a prefix matching p and a suffix matching q,
- with the union of substitutions corresponding to the first match (see below).
-
A repetition pattern p* binds no variables.
- It matches any string that can be decomposed
- into strings that match p, with the empty substitution.
-
-
-
-The following pattern is only available for the types Integer
-and Intsn:
-
-
-
An integer literal pattern, e.g. 214, binds no variables.
- It matches the same integer, with
- the empty substitution.
-
-
-
-All patterns must be linear: the same pattern variable may occur
-only once in them. This is what makes it straightforward to speak
-about unions of binding sets and substitutions.
-
-
-Pattern matching is performed in the order in which the branches
-appear in the source code: the branch of the first matching pattern is followed.
-In concrete syntax, the type checker reject sets of patterns that are
-not exhaustive, and warns for completely overshadowed patterns.
-It also checks the type correctness of patterns with respect to the
-argument type. In abstract syntax, only type correctness is checked,
-no exhaustiveness or overshadowing.
-
-
-It follows from the definition of record pattern matching
-that it can utilize partial records: the branch
-
-
- {g = Fem} => t
-
-
-in a table of type {g : Gender ; n : Number} => T means the same as
-
-
- {g = Fem ; n = _} => t
-
-
-Variables in regular expression patterns
-are always bound to the first match, which is the first
-in the sequence of binding lists. For example:
-
-
-
x + "e" + y matches "peter" with x = "p", y = "ter"
-
x + "er"* matches "burgerer" with x = "burg"
-
-
-
-
Free variation
-
-An expressions of the form
-
-variants{t1 ; ... ; tn}
-
-where all ti are of the same type T, has itseld type T.
-This expression presents ti,...,tn as being in free variation:
-the choice between them is not determined by semantics or parameters.
-A limiting case is
-
-variants {}
-
-which encodes a rule saying that there is no way to express a certain
-thing, e.g. that a certain inflectional form does not exist.
-
-
-A common wisdom in linguistics is that "there is no free variation", which
-refers to the situation where all aspects are taken into account. For
-instance, the English negation contraction could be expressed as free variation,
-
-
- variants {"don't" ; "do" ++ "not"}
-
-
-if only semantics is taken into account, but if stylistic aspects are included,
-then the proper formulation might be with a parameter distinguishing between
-informal and formal style:
-
-
- case style of {Informal => "don't" ; Formal => "do" ++ "not"}
-
-
-Since there is not way to choose a particular element from a ``variants` list,
-free variants is normally not adequate in libraries, nor in grammars meant for
-natural language generation. In application grammars
-meant to parse user input, free variation is a way to avoid cluttering the
-abstract syntax with semantically insignificant distinctions and even to
-tolerate some grammatical errors.
-
-
-Permitting variants in all types involves a major modification of the
-semantics of GF expressions. All computation rules have to be lifted to
-deal with lists of expressions and values. For instance,
-
-A local definition, i.e. a let expression has the form
-
-letx : T = tine
-
-The type of x must be T, which also has to be the type of t.
-Computation is performed by substituting t for x in e:
-
-letx : T = tine ==> e {x = t}
-
-As syntactic sugar, the type can be omitted if the type checker is
-able to infer it:
-
-letx = tine
-
-It is possible to compress several local definitions into one block:
-
-letx : T = t;y : U = uine
-===
-letx : T = tinlety : U = uine
-
-Another notational variant is a definition block appearing after the main
-expression:
-
-ewhere{...} === let{...}ine
-
-Curly brackets are obligatory in the where form, and can
-also be optionally used in the let form.
-
-
-Since a block of definitions is treated as syntactic sugar
-for a nested let expression, a constant must be defined before it
-is used: the scope is not mutual, as in a module body.
-Furthermore, unlike in lin and oper definitions, it is not possible
-to bind variables on the left of the equality sign.
-
-
-
Function applications in concrete syntax
-
-
-
-
-Fully compiled concrete syntax may not include expressions of function types
-except on the outermost level of lin rules, as defined here.
-However,
-in the source code, and especially in oper definitions, functions
-are the main vehicle of code reuse and abstraction. Thus function types and
-functions follow the same rules as in abstract syntax, as specified
-here. In
-particular, the application of a lambda abstract is computed by beta conversion.
-
-
-To ensure the elimination of functions, GF uses a special computation rule
-for pushing function applications inside tables, since otherwise run-time
-variables could block their applications:
-
-Also parameter constructors with non-empty contexts, as defined
-here,
-result in expressions in application form. These expressions are never
-a problem if their arguments are just constructors, because they can then
-be translated to integers corresponding to the position of the expression
-in the enumaration of the values of its type.
-However, a constructor
-applied to a run-time variable may need to be converted as follows:
-
-C...x... ==> casex of {_ =>C...x}
-
-The resulting expression, when processed by table expansion as explained
-here,
-results in C being applied to just values of the type of x, and the
-application thereby disappears.
-
-
-
Reusing top-level grammars as resources
-
-
-
-
-This section is valid for GF 3.0, which abandons the "lock field"
-discipline of GF 2.8.
-
-
-As explained here,
-abstract syntax modules can be opened as interfaces
-and concrete syntaxes as their instances. This means that judgements are,
-as it were, translated in the following way:
-
-
-
catCG ===> operC : Type
-
funf : T ===> operf : T
-
lincatC = T ===> operC : Type = C
-
linf = t ===> operf = t
-
-
-
-Notice that the value T of lincat definitions is not disclosed
-in the translation. This means that the type C remains abstract: the
-only ways of building an object of type C are the operations f
-obtained from fun and lin rules.
-
-
-The purpose of keeping linearization types abstract is to enforce
-grammar checking via type checking. This means that any well-typed
-operation application is also well-typed in the sense of the original
-grammar. If the types were disclosed, then we could for instance easily
-confuse all categories that have the linearization
-type {s : Str}. Yet another reason is that revealing the types
-makes it impossible for the library programmers to change their type
-definitions afterwards.
-
-
-Library writers may occasionally want to have access to the values of
-linearization types. The way to make it possible is to add an extra
-construction operation to a module in which the linearization type
-is available:
-
-
- oper MkC : T -> C = \x -> x
-
-
-In object-oriented terms, the type C itself is protected, whereas
-MkC is a public constructor of C. Of course, it is possible to
-make these constructors overloaded (concept explained here),
-to enable easy access to special cases.
-
-
-
Predefined concrete syntax types
-
-
-
-
-The following concrete syntax types are predefined:
-
-
-
Str, the type of tokens and token lists (defined here)
-
Integer, the type of nonnegative integers
-
Intsn, the type of integers from 0 to n
-
Type, the type of (concrete syntax) types
-
PType, the type of parameter types
-
-
-
-The last two types are, in a way, extended by user-written grammars,
-since new parameter types can be defined in the way shown here,
-and every paramater type is also a type. From the point of view of the values
-of expressions, however, a param declaration does not extend
-PType, since all parameter types get compiled to initial
-segments of integers.
-
-
-Notice the difference between the concrete syntax types
-Str and Integer on the one hand, and the abstract
-syntax categories String and Int, on the other.
-As concrete syntax types, the latter are treated in
-the same way as any reused categories: their objects
-can be formed by using syntax trees (string and integer
-literals).
-
-
-The type nameIntegerreplaces in GF 3.0 the nameInt,
-to avoid confusion with the abstract syntax type and to be analogous
-with theStrvs.Stringdistinction.
-
-
-
Predefined concrete syntax operations
-
-The following predefined operations are defined in the resource module
-prelude/Predef.gf. Their implementations are defined as
-a part of the GF grammar compiler.
-
-
-
-
operation
-
type
-
explanation
-
-
-
PBool
-
PType
-
PTrue | PFalse
-
-
-
Error
-
Type
-
the empty type
-
-
-
Int
-
Type
-
the type of integers
-
-
-
Ints
-
Integer -> Type
-
the type of integers from 0 to n
-
-
-
error
-
Str -> Error
-
forms error message
-
-
-
length
-
Str -> Int
-
length of string
-
-
-
drop
-
Integer -> Str -> Str
-
drop prefix of length
-
-
-
take
-
Integer -> Str -> Str
-
take prefix of length
-
-
-
tk
-
Integer -> Str -> Str
-
drop suffix of length
-
-
-
dp
-
Integer -> Str -> Str
-
take suffix of length
-
-
-
eqInt
-
Integer -> Integer -> PBool
-
test if equal integers
-
-
-
lessInt
-
Integer -> Integer -> PBool
-
test order of integers
-
-
-
plus
-
Integer -> Integer -> Integer
-
add integers
-
-
-
eqStr
-
Str -> Str -> PBool
-
test if equal strings
-
-
-
occur
-
Str -> Str -> PBool
-
test if occurs as substring
-
-
-
occurs
-
Str -> Str -> PBool
-
test if any char occurs
-
-
-
show
-
(P : Type) -> P -> Str
-
convert param to string
-
-
-
read
-
(P : Type) -> Str -> P
-
convert string to param
-
-
-
toStr
-
(L : Type) -> L -> Str
-
find the "first" string
-
-
-
nonExist
-
Str
-
this is a special token marking
-non-existing morphological forms
-
-
-
BIND
-
Str
-
this is a special token marking
-that the surrounding tokens should not
-be separated by space
-
-
-
SOFT_BIND
-
Str
-
this is a special token marking
-that the surrounding tokens may not
-be separated by space
-
-
-
-
-
-Compilation eliminates these operations, and they may therefore not
-take arguments that depend on run-time variables.
-
-
-The module Predef is included in the opens list of all
-modules, and therefore does not need to be opened explicitly.
-
-
-
Flags and pragmas
-
-
Some flags and their values
-
-
-
-
-The flag coding in concrete syntax sets the character encoding
-used in the grammar. Internally, GF uses unicode, and .pgf files
-are always written in UTF8 encoding. The presence of the flag
-coding=utf8 prevents GF from encoding an already encoded
-file.
-
-
-
-
-
-The flag startcat in abstract syntax sets the default start category for
-parsing, random generation, and any other grammar operation that depends
-on category. Its legal values are the categories defined or inherited in
-the abstract syntax.
-
-
-
-
-
-
-
Compiler pragmas
-
-Compiler pragmas are a special form of comments prefixed with --#.
-Currently GF interprets the following pragmas.
-
-in the top of FILE.gf causes the GF compiler, when invoked on FILE.gf,
-to search through the current directory (.) and the directories
-present, prelude, and /home/aarne/GF/tmp, in this order.
-If a directory DIR is not found relative to the working directory,
-$(GF_LIB_PATH)/DIR is searched. $GF_LIB_PATH
-can be a colon-separated list of directories, in which case each directory
-in the list contributes to the search path expansion.
-
-
-
-
Alternative grammar input formats
-
-While the GF language as specified in this document is the most versatile
-and powerful way of writing GF grammars, there are several other formats
-that a GF compiler may make available for users, either to get started
-with small grammars or to semiautomatically convert grammars from other
-formats to GF. Here are the ones supported by GF 2.8 and 3.0.
-
-
-
Old GF without modules
-
-
-
-
-Before GF compiler version 2.0, there was no module system, and
-all kinds of judgement could be written in all files, without
-any headers. This format is still available, and the compiler
-(version 2.8) detects automatically if a file is in the current
-or the old format. However, the old format is not recommended
-because of pure modularity and missing separate compilation,
-and also because libraries are not available, since the old
-and the new format cannot be mixed. With version 2.8, grammars
-in the old format can be converted to modular grammar with the
-command
-
-
- > import -o FILE.gf
-
-
-which rewrites the grammar divided into three files:
-an abstract, a concrete, and a resource module.
-
-
-
Context-free grammars
-
-A quick way to write a GF grammar is to use the context-free format,
-also known as BNF. Files of this form are recognized by the suffix
-.cf. Rules in these files have the form
-
-Label.Cat::= (String | Cat)* ;
-
-where Label and Cat are identifiers and String quoted strings.
-
-
-There is a shortcut form generating labels automatically,
-
-Cat::= (String | Cat)* ;
-
-In the shortcut form, vertical bars (|) can be used to give
-several right-hand-sides at a time. An empty right-hand side
-means the singleton of an empty sequence, and not an empty union.
-
-
-Just like old-style GF files (previous section), contex-free grammar
-files can be converted to modular GF by using the -o option to
-the compiler in GF 2.8.
-
-
-
Extended BNF grammars
-
-Extended BNF (FILE.ebnf)
-goes one step further from the shortcut notation of previous section.
-The rules have the form
-
-Cat::=RHS;
-
-where an RHS can be any regular expression
-built from quoted strings and category symbols, in the following ways:
-
-
-
-
RHS item
-
explanation
-
-
-
Cat
-
nonterminal
-
-
-
String
-
terminal
-
-
-
RHSRHS
-
sequence
-
-
-
RHS|RHS
-
alternatives
-
-
-
RHS?
-
optional
-
-
-
RHS*
-
repetition
-
-
-
RHS+
-
non-empty repetition|
-
-
-
-
-
-Parentheses are used to override standard precedences, where
-| binds weaker than sequencing, which binds weaker than the unary operations.
-
-
-The compiler generates not only labels, but also new categories corresponding
-to the regular expression combinations actually in use.
-
-
-Just like .cf files (previous section), .ebnf
-files can be converted to modular GF by using the -o option to
-the compiler in GF 2.8.
-
-
-
Example-based grammars
-
-Example-based grammars (.gfe) provide a way to use
-resource grammar libraries without having to know the names
-of functions in them. The compiler works as a preprocessor,
-saving the result in a (.gf) file, which can be compiled
-as usual.
-
-
-If a library is implemented as an abstract and concrete syntax,
-it can be used for parsing. Calls of library functions can therefore
-be formed by parsing strings in the library. GF has an expression
-format for this,
-
-inCString
-
-where C is the category in which to parse (it can be qualified by
-the module name) and the string is the input to parser. Expressions
-of this form are replaced by the syntax trees that result. These
-trees are always type-correct. If several parses are found, all but
-the first one are given in comments.
-
-
-Here is an example, from GF/examples/animal/:
-
-
- --# -resource=../../lib/present/LangEng.gfc
- --# -path=.:present:prelude
-
- incomplete concrete QuestionsI of Questions = open Lang in {
- lincat
- Phrase = Phr ;
- Entity = N ;
- Action = V2 ;
- lin
- Who love_V2 man_N = in Phr "who loves men" ;
- Whom man_N love_V2 = in Phr "whom does the man love" ;
- Answer woman_N love_V2 man_N = in Phr "the woman loves men" ;
- }
-
-
-The resource pragma shows the grammar that is used for parsing
-the examples.
-
-
-Notice that the variables love_V2, man_N, etc, are
-actually constants in the library. In the resulting rules, such as
-
-those constants are nonetheless treated as variables, following
-the normal binding conventions, as stated here.
-
-
-
The grammar of GF
-
-The following grammar is actually used in the parser of GF, although we have
-omitted
-some obsolete rules still included in the parser for backward compatibility
-reasons.
-
-
-This document was automatically generated by the BNF-Converter. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place).
-
-
-
The lexical structure of GF
-
-
Identifiers
-
-Identifiers Ident are unquoted strings beginning with a letter,
-followed by any combination of letters, digits, and the characters _ '
-reserved words excluded.
-
-
-
Literals
-
-Integer literals Integer are nonempty sequences of digits.
-
-
-String literals String have the form
-"x"}, where x is any sequence of any characters
-except " unless preceded by \.
-
-
-Double-precision float literals Double have the structure
-indicated by the regular expression digit+ '.' digit+ ('e' ('-')? digit+)? i.e.\
-two sequences of digits separated by a decimal point, optionally
-followed by an unsigned or negative exponent.
-
-
-
Reserved words and symbols
-
-The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions.
-
-
-The reserved words used in GF are the following:
-
-
-
-
PType
-
Str
-
Strs
-
Type
-
-
-
abstract
-
case
-
cat
-
concrete
-
-
-
data
-
def
-
flags
-
fun
-
-
-
in
-
incomplete
-
instance
-
interface
-
-
-
let
-
lin
-
lincat
-
lindef
-
-
-
linref
-
of
-
open
-
oper
-
-
-
param
-
pre
-
printname
-
resource
-
-
-
strs
-
table
-
transfer
-
variants
-
-
-
where
-
with
-
-
-
-
-
-
-
-The symbols used in GF are the following:
-
-
-
-
;
-
=
-
:
-
->
-
-
-
{
-
}
-
**
-
,
-
-
-
(
-
)
-
[
-
]
-
-
-
-
-
.
-
|
-
?
-
-
-
<
-
>
-
@
-
!
-
-
-
*
-
+
-
++
-
\
-
-
-
=>
-
_
-
$
-
/
-
-
-
-
-
-
Comments
-
-Single-line comments begin with --.Multiple-line comments are enclosed with {- and -}.
-
-
-
The syntactic structure of GF
-
-Non-terminals are enclosed between < and >.
-The symbols -> (production), | (union)
-and eps (empty rule) belong to the BNF notation.
-All other symbols are terminals.
-
-
-
-
Grammar
-
->
-
[ModDef]
-
-
-
[ModDef]
-
->
-
eps
-
-
-
-
|
-
ModDef[ModDef]
-
-
-
ModDef
-
->
-
ModDef;
-
-
-
-
|
-
ComplModModType=ModBody
-
-
-
ModType
-
->
-
abstractIdent
-
-
-
-
|
-
resourceIdent
-
-
-
-
|
-
interfaceIdent
-
-
-
-
|
-
concreteIdentofIdent
-
-
-
-
|
-
instanceIdentofIdent
-
-
-
-
|
-
transferIdent:Open->Open
-
-
-
ModBody
-
->
-
ExtendOpens{[TopDef]}
-
-
-
-
|
-
[Included]
-
-
-
-
|
-
Includedwith[Open]
-
-
-
-
|
-
Includedwith[Open]**Opens{[TopDef]}
-
-
-
-
|
-
[Included]**Includedwith[Open]
-
-
-
-
|
-
[Included]**Includedwith[Open]**Opens{[TopDef]}
-
-
-
[TopDef]
-
->
-
eps
-
-
-
-
|
-
TopDef[TopDef]
-
-
-
Extend
-
->
-
[Included]**
-
-
-
-
|
-
eps
-
-
-
[Open]
-
->
-
eps
-
-
-
-
|
-
Open
-
-
-
-
|
-
Open,[Open]
-
-
-
Opens
-
->
-
eps
-
-
-
-
|
-
open[Open]in
-
-
-
Open
-
->
-
Ident
-
-
-
-
|
-
(QualOpenIdent)
-
-
-
-
|
-
(QualOpenIdent=Ident)
-
-
-
ComplMod
-
->
-
eps
-
-
-
-
|
-
incomplete
-
-
-
QualOpen
-
->
-
eps
-
-
-
[Included]
-
->
-
eps
-
-
-
-
|
-
Included
-
-
-
-
|
-
Included,[Included]
-
-
-
Included
-
->
-
Ident
-
-
-
-
|
-
Ident[[Ident]]
-
-
-
-
|
-
Ident-[[Ident]]
-
-
-
Def
-
->
-
[Name]:Exp
-
-
-
-
|
-
[Name]=Exp
-
-
-
-
|
-
Name[Patt]=Exp
-
-
-
-
|
-
[Name]:Exp=Exp
-
-
-
TopDef
-
->
-
cat[CatDef]
-
-
-
-
|
-
fun[FunDef]
-
-
-
-
|
-
data[FunDef]
-
-
-
-
|
-
def[Def]
-
-
-
-
|
-
data[DataDef]
-
-
-
-
|
-
param[ParDef]
-
-
-
-
|
-
oper[Def]
-
-
-
-
|
-
lincat[PrintDef]
-
-
-
-
|
-
lindef[Def]
-
-
-
-
|
-
linref[Def]
-
-
-
-
|
-
lin[Def]
-
-
-
-
|
-
printnamecat[PrintDef]
-
-
-
-
|
-
printnamefun[PrintDef]
-
-
-
-
|
-
flags[FlagDef]
-
-
-
CatDef
-
->
-
Ident[DDecl]
-
-
-
-
|
-
[Ident[DDecl]]
-
-
-
-
|
-
[Ident[DDecl]]{Integer}
-
-
-
FunDef
-
->
-
[Ident]:Exp
-
-
-
DataDef
-
->
-
Ident=[DataConstr]
-
-
-
DataConstr
-
->
-
Ident
-
-
-
-
|
-
Ident.Ident
-
-
-
[DataConstr]
-
->
-
eps
-
-
-
-
|
-
DataConstr
-
-
-
-
|
-
DataConstr|[DataConstr]
-
-
-
ParDef
-
->
-
Ident=[ParConstr]
-
-
-
-
|
-
Ident=(inIdent)
-
-
-
-
|
-
Ident
-
-
-
ParConstr
-
->
-
Ident[DDecl]
-
-
-
PrintDef
-
->
-
[Name]=Exp
-
-
-
FlagDef
-
->
-
Ident=Ident
-
-
-
[Def]
-
->
-
Def;
-
-
-
-
|
-
Def;[Def]
-
-
-
[CatDef]
-
->
-
CatDef;
-
-
-
-
|
-
CatDef;[CatDef]
-
-
-
[FunDef]
-
->
-
FunDef;
-
-
-
-
|
-
FunDef;[FunDef]
-
-
-
[DataDef]
-
->
-
DataDef;
-
-
-
-
|
-
DataDef;[DataDef]
-
-
-
[ParDef]
-
->
-
ParDef;
-
-
-
-
|
-
ParDef;[ParDef]
-
-
-
[PrintDef]
-
->
-
PrintDef;
-
-
-
-
|
-
PrintDef;[PrintDef]
-
-
-
[FlagDef]
-
->
-
FlagDef;
-
-
-
-
|
-
FlagDef;[FlagDef]
-
-
-
[ParConstr]
-
->
-
eps
-
-
-
-
|
-
ParConstr
-
-
-
-
|
-
ParConstr|[ParConstr]
-
-
-
[Ident]
-
->
-
Ident
-
-
-
-
|
-
Ident,[Ident]
-
-
-
Name
-
->
-
Ident
-
-
-
-
|
-
[Ident]
-
-
-
[Name]
-
->
-
Name
-
-
-
-
|
-
Name,[Name]
-
-
-
LocDef
-
->
-
[Ident]:Exp
-
-
-
-
|
-
[Ident]=Exp
-
-
-
-
|
-
[Ident]:Exp=Exp
-
-
-
[LocDef]
-
->
-
eps
-
-
-
-
|
-
LocDef
-
-
-
-
|
-
LocDef;[LocDef]
-
-
-
Exp6
-
->
-
Ident
-
-
-
-
|
-
Sort
-
-
-
-
|
-
String
-
-
-
-
|
-
Integer
-
-
-
-
|
-
Double
-
-
-
-
|
-
?
-
-
-
-
|
-
[]
-
-
-
-
|
-
data
-
-
-
-
|
-
[IdentExps]
-
-
-
-
|
-
[String]
-
-
-
-
|
-
{[LocDef]}
-
-
-
-
|
-
<[TupleComp]>
-
-
-
-
|
-
<Exp:Exp>
-
-
-
-
|
-
(Exp)
-
-
-
Exp5
-
->
-
Exp5.Label
-
-
-
-
|
-
Exp6
-
-
-
Exp4
-
->
-
Exp4Exp5
-
-
-
-
|
-
table{[Case]}
-
-
-
-
|
-
tableExp6{[Case]}
-
-
-
-
|
-
tableExp6[[Exp]]
-
-
-
-
|
-
caseExpof{[Case]}
-
-
-
-
|
-
variants{[Exp]}
-
-
-
-
|
-
pre{Exp;[Altern]}
-
-
-
-
|
-
strs{[Exp]}
-
-
-
-
|
-
Ident@Exp6
-
-
-
-
|
-
Exp5
-
-
-
Exp3
-
->
-
Exp3!Exp4
-
-
-
-
|
-
Exp3*Exp4
-
-
-
-
|
-
Exp3**Exp4
-
-
-
-
|
-
Exp4
-
-
-
Exp1
-
->
-
Exp2+Exp1
-
-
-
-
|
-
Exp2
-
-
-
Exp
-
->
-
Exp1++Exp
-
-
-
-
|
-
\[Bind]->Exp
-
-
-
-
|
-
\\[Bind]=>Exp
-
-
-
-
|
-
Decl->Exp
-
-
-
-
|
-
Exp3=>Exp
-
-
-
-
|
-
let{[LocDef]}inExp
-
-
-
-
|
-
let[LocDef]inExp
-
-
-
-
|
-
Exp3where{[LocDef]}
-
-
-
-
|
-
inExp5String
-
-
-
-
|
-
Exp1
-
-
-
Exp2
-
->
-
Exp3
-
-
-
[Exp]
-
->
-
eps
-
-
-
-
|
-
Exp
-
-
-
-
|
-
Exp;[Exp]
-
-
-
Exps
-
->
-
eps
-
-
-
-
|
-
Exp6Exps
-
-
-
Patt2
-
->
-
_
-
-
-
-
|
-
Ident
-
-
-
-
|
-
Ident.Ident
-
-
-
-
|
-
Integer
-
-
-
-
|
-
Double
-
-
-
-
|
-
String
-
-
-
-
|
-
{[PattAss]}
-
-
-
-
|
-
<[PattTupleComp]>
-
-
-
-
|
-
(Patt)
-
-
-
Patt1
-
->
-
Ident[Patt]
-
-
-
-
|
-
Ident.Ident[Patt]
-
-
-
-
|
-
Patt2*
-
-
-
-
|
-
Ident@Patt2
-
-
-
-
|
-
-Patt2
-
-
-
-
|
-
Patt2
-
-
-
Patt
-
->
-
Patt|Patt1
-
-
-
-
|
-
Patt+Patt1
-
-
-
-
|
-
Patt1
-
-
-
PattAss
-
->
-
[Ident]=Patt
-
-
-
Label
-
->
-
Ident
-
-
-
-
|
-
$Integer
-
-
-
Sort
-
->
-
Type
-
-
-
-
|
-
PType
-
-
-
-
|
-
Str
-
-
-
-
|
-
Strs
-
-
-
[PattAss]
-
->
-
eps
-
-
-
-
|
-
PattAss
-
-
-
-
|
-
PattAss;[PattAss]
-
-
-
[Patt]
-
->
-
Patt2
-
-
-
-
|
-
Patt2[Patt]
-
-
-
Bind
-
->
-
Ident
-
-
-
-
|
-
_
-
-
-
[Bind]
-
->
-
eps
-
-
-
-
|
-
Bind
-
-
-
-
|
-
Bind,[Bind]
-
-
-
Decl
-
->
-
([Bind]:Exp)
-
-
-
-
|
-
Exp4
-
-
-
TupleComp
-
->
-
Exp
-
-
-
PattTupleComp
-
->
-
Patt
-
-
-
[TupleComp]
-
->
-
eps
-
-
-
-
|
-
TupleComp
-
-
-
-
|
-
TupleComp,[TupleComp]
-
-
-
[PattTupleComp]
-
->
-
eps
-
-
-
-
|
-
PattTupleComp
-
-
-
-
|
-
PattTupleComp,[PattTupleComp]
-
-
-
Case
-
->
-
Patt=>Exp
-
-
-
[Case]
-
->
-
Case
-
-
-
-
|
-
Case;[Case]
-
-
-
Altern
-
->
-
Exp/Exp
-
-
-
[Altern]
-
->
-
eps
-
-
-
-
|
-
Altern
-
-
-
-
|
-
Altern;[Altern]
-
-
-
DDecl
-
->
-
([Bind]:Exp)
-
-
-
-
|
-
Exp6
-
-
-
[DDecl]
-
->
-
eps
-
-
-
-
|
-
DDecl[DDecl]
-
-
-
-
-
-
diff --git a/doc/gf-refman.md b/doc/gf-refman.md
new file mode 100644
index 000000000..2a53041d9
--- /dev/null
+++ b/doc/gf-refman.md
@@ -0,0 +1,2770 @@
+---
+title: GF Language Reference Manual
+author: Aarne Ranta, Krasimir Angelov
+date: June 2014, GF 3.6
+show-toc: true
+---
+
+This document is a reference manual to the GF programming language. GF,
+Grammatical Framework, is a special-purpose programming language,
+designed to support definitions of grammars.
+
+This document is not an introduction to GF; such introduction can be
+found in the GF tutorial available on line on the GF web page,
+
+[`grammaticalframework.org`](http://grammaticalframework.org)
+
+This manual covers only the language, not the GF compiler or interactive
+system. We will however make some references to different compiler
+versions, if they involve changes of behaviour having to do with the
+language specification.
+
+This manual is meant to be fully compatible with GF version 3.0. Main
+discrepancies with version 2.8 are indicated, as well as with the
+reference article on GF,
+
+A. Ranta, \"Grammatical Framework. A Type Theoretical Grammar
+Formalism\", *The Journal of Functional Programming* 14(2), 2004, pp.
+145-189.
+
+This article will referred to as \"the JFP article\".
+
+As metalinguistic notation, we will use the symbols
+
+- *a* === *b* to say that *a* is syntactic sugar for *b*
+- *a* ==\> *b* to say that *a* is computed (or compiled) to *b*
+
+
+Overview of GF
+--------------
+
+GF is a typed functional language, borrowing many of its constructs from
+ML and Haskell: algebraic datatypes, higher-order functions, pattern
+matching. The module system bears resemblance to ML (functors) but also
+to object-oriented languages (inheritance). The type theory used in the
+abstract syntax part of GF is inherited from logical frameworks, in
+particular ALF (\"Another Logical Framework\"; in a sense, GF is Yet
+Another ALF). From ALF comes also the use of dependent types, including
+the use of explicit type variables instead of Hindley-Milner
+polymorphism.
+
+The look and feel of GF is close to Java and C, due to the use of curly
+brackets and semicolons in structuring the code; the expression syntax,
+however, follows Haskell in using juxtaposition for function application
+and parentheses only for grouping.
+
+To understand the constructs of GF, and especially their limitations in
+comparison to general-purpose programming languages, it is essential to
+keep in mind that GF is a special-purpose and non-turing-complete
+language. Every GF program is ultimately compiled to a **multilingual
+grammar**, which consists of an **abstract syntax** and a set of
+**concrete syntaxes**. The abstract syntax defines a system of **syntax
+trees**, and each concrete syntax defines a mapping from those syntax
+trees to **nested tuples** of strings and integers. This mapping is
+**compositional**, i.e. **homomorphic**, and moreover **reversible**:
+given a nested tuple, there exists an effective way of finding the set
+of syntax trees that map to this tuple. The procedure of applying the
+mapping to a tree to produce a tuple is called **linearization**, and
+the reverse search procedure is called **parsing**. It is ultimately the
+requirement of reversibility that restricts GF to be less than
+turing-complete. This is reflected in restrictions to recursion in
+concrete syntax. Tree formation in abstract syntax, however, is fully
+recursive.
+
+Even though run-time GF grammars manipulate just nested tuples, at
+compile time these are represented by the more fine-grained labelled
+records and finite functions over algebraic datatypes. This enables the
+programmer to write on a higher abstraction level, and also adds type
+distinctions and hence raises the level of checking of programs.
+
+
+The module system
+-----------------
+
+
+### Top-level and supplementary module structure
+
+The big picture of GF as a programming language for multilingual
+grammars explains its principal module structure. Any GF grammar must
+have an abstract syntax module; it can in addition have any number of
+concrete syntax modules matching that abstract syntax. Before going to
+details, we give a simple example: a module defining the **category**
+`A` of adjectives and one adjective-forming **function**, the zero-place
+function `Even`. We give the module the name `Adj`. The GF code for the
+module looks as follows:
+
+ abstract Adj = {
+ cat A ;
+ fun Even : A ;
+ }
+
+Here are two concrete syntax modules, one intended for mapping the trees
+to English, the other to Swedish. The mapping is defined by `lincat`
+definitions assigning a **linearization type** to each category, and
+`lin` definitions assigning a **linearization** to each function.
+
+ concrete AdjEng of Adj = {
+ lincat A = {s : Str} ;
+ lin Even = {s = "even"} ;
+ }
+
+ concrete AdjSwe of Adj = {
+ lincat A = {s : AForm => Str} ;
+ lin Even = {s = table {
+ ASg Utr => "jämn" ;
+ ASg Neutr => "jämnt" ;
+ APl => "jämna"
+ }
+ } ;
+ param AForm = ASg Gender | APl ;
+ param Gender = Utr | Neutr ;
+ }
+
+These examples illustrate the main ideas of multilingual grammars:
+
+- the concrete syntax must match the abstract syntax:
+ - every `cat` is given a `lincat`
+ - every `fun` is given a `lin`
+
+
+
+- the concrete syntax is internally coherent:
+ - the `lin` rules respect the types defined by `lincat` rules
+
+
+
+- concrete syntaxes are independent of each other
+ - they can use different `lincat` and `lin` definitions
+ - they can define their own **parameter types** (`param`)
+
+The first two ideas form the core of the **static checking** of GF
+grammars, eliminating the possibility of run-time errors in
+linearization and parsing. The third idea gives GF the expressive power
+needed to map abstract syntax to vastly different languages.
+
+Abstract and concrete modules are called **top-level grammar modules**,
+since they are the ones that remain in grammar systems at run time.
+However, in order to support **modular grammar engineering**, GF
+provides much more module structure than strictly required in top-level
+grammars.
+
+**Inheritance**, also known as **extension**, means that a module can
+inherit the contents of one or more other modules to which new
+judgements are added, e.g.
+
+ abstract MoreAdj = Adj ** {
+ fun Odd : A ;
+ }
+
+**Resource modules** define parameter types and **operations** usable in
+several concrete syntaxes,
+
+ resource MorphoFre = {
+ param Number = Sg | Pl ;
+ param Gender = Masc | Fem ;
+ oper regA : Str -> {s : Gender => Number => Str} =
+ \fin -> {
+ s = table {
+ Masc => table {Sg => fin ; Pl => fin + "s"} ;
+ Fem => table {Sg => fin + "e" ; Pl => fin + "es"}
+ }
+ } ;
+ }
+
+By **opening**, a module can use the contents of a resource module
+without inheriting them, e.g.
+
+ concrete AdjFre of Adj = open MorphoFre in {
+ lincat A = {s : Gender => Number => Str} ;
+ lin Even = regA "pair" ;
+ }
+
+**Interfaces** and **instances** separate the contents of a resource
+module to type signatures and definitions, in a way analogous to
+abstract vs. concrete modules, e.g.
+
+ interface Lexicon = {
+ oper Adjective : Type ;
+ oper even_A : Adjective ;
+ }
+
+ instance LexiconEng of Lexicon = {
+ oper Adjective = {s : Str} ;
+ oper even_A = {s = "even"} ;
+ }
+
+**Functors** i.e. **parametrized modules** i.e. **incomplete modules**,
+defining a concrete syntax in terms of an interface.
+
+ incomplete concrete AdjI of Adj = open Lexicon in {
+ lincat A = Adjective ;
+ lin Even = even_A ;
+ }
+
+A functor can be **instantiated** by providing instances of its open
+interfaces.
+
+ concrete AdjEng of Adj = AdjI with (Lexicon = LexiconEng) ;
+
+
+### Compilation units
+
+The compilation unit of GF source code is a file that contains a module.
+Judgements outside modules are supported only for backward
+compatibility, as explained [here](#oldgf). Every source file, suffixed
+`.gf`, is compiled to a \"GF object file\", suffixed `.gfo` (as of GF
+Version 3.0 and later). For runtime grammar objects used for parsing and
+linearization, a set of `.gfo` files is linked to a single file suffixed
+`.pgf`. While `.gf` and `.gfo` files may contain modules of any kinds, a
+`.pgf` file always contains a multilingual grammar with one abstract and
+a set of concrete syntaxes.
+
+The following diagram summarizes the files involved in the compilation
+process.
+
+`module1.gf module2.gf ... modulen.gf`
+
+==\>
+
+`module1.gfo module2.gfo ... modulen.gfo`
+
+==\>
+
+grammar.pgf
+
+Both `.gf` and `.gfo` files are written in the GF source language;
+`.pgf` files are written in a lower-level format. The process of
+translating `.gf` to `.gfo` consists of **name resolution**, **type
+annotation**, **partial evaluation**, and **optimization**. There is a
+great advantage in the possibility to do this separately for GF modules
+and saving the result in `.gfo` files. The partial evaluation phase, in
+particular, is time and memory consuming, and GF libraries are therefore
+distributed in `.gfo` to make their use less arduous.
+
+*In GF before version 3.0, the object files are in a format called
+`.gfc`,* *and the multilingual runtime grammar is in a format called
+`.gfcm`.*
+
+The standard compiler has a built-in **make facility**, which finds out
+what other modules are needed when compiling an explicitly given module.
+This facility builds a dependency graph and decides which of the
+involved modules need recompilation (from `.gf` to `.gfo`), and for
+which the GF object can be used directly.
+
+
+### Names
+
+Each module *M* defines a set of **names**, which are visible in *M*
+itself, in all modules extending *M* (unless excluded, as explained
+[here](#restrictedinheritance)), and all modules opening *M*. These
+names can stand for abstract syntax categories and functions, parameter
+types and parameter constructors, and operations. All these names live
+in the same **name space**, which means that a name entering a module
+more than once due to inheritance or opening can lead to a **conflict**.
+It is specified [here](#renaming) how these conflicts are resolved.
+
+The names of modules live in a name space separate from the other names.
+Even here, all names must be distinct in a set of files compiled to a
+multilingual grammar. In particular, even files residing in different
+directories must have different names, since GF has no notion of
+hierarchic module names.
+
+Lexically, names belong to the class of **identifiers**. An idenfifier
+is a letter followed by any number of letters, digits, undercores (`_`)
+and primes (`'`). Upper- and lower-case letters are treated as distinct.
+Nothing dictates the choice of upper or lower-case initials, but the
+standard libraries follow conventions similar to Haskell:
+
+- upper case is used for modules, abstract syntax categories and
+ functions, parameter types and constructors, and type synonyms
+- lower case is used for non-type-valued operations and for variables
+
+[]{#identifiers}
+
+\"Letters\" as mentioned in the identifier syntax include all 7-bit
+ASCII letters. Iso-latin-1 and Unicode letters are supported in varying
+degrees by different tools and platforms, and are hence not recommended
+in identifiers.
+
+
+### The structure of a module
+
+Modules of all types have the following structure:
+
+*moduletype* *name* `=` *extends* *opens* *body*
+
+The part of the module preceding the body is its **header**. The header
+defines the type of the module and tells what other modules it inherits
+and opens. The body consists of the judgements that introduce all the
+new names defined by the module.
+
+Any of the parts *extends*, *opens*, and *body* may be empty. If they
+are all filled, delimiters and keywords separate the parts in the
+following way:
+
+*moduletype* *name* `=` *extends* `**` `open` *opens* `in` `{` *body*
+`}`
+
+The part *moduletype* *name* looks slightly different if the type is
+`concrete` or `instance`: the *name* intrudes between the type keyword
+and the name of the module being implemented and which really belongs to
+the type of the module:
+
+`concrete` *name* `of` *abstractname*
+
+The only exception to the schema of functor syntax is functor
+instantiations: the instantiation list is given in a special way between
+*extends* and *opens*:
+
+`incomplete concrete` *name* `of` *abstractname* `=` *extends* `**`
+*functorname* `with` *instantiations* `**` `open` *opens* `in` `{`
+*body* `}`
+
+Logically, the part \"*functorname* `with` *instantiations*\" should
+really be one of the *extends*. This is also shown by the fact that it
+can have restricted inheritance (concept defined
+[here](#restrictedinheritance)).
+
+
+### Module types, headers, and bodies
+
+The *extends* and *opens* parts of a module header are lists of module
+names (with possible qualifications, as defined below
+[here](#qualifiednames)). The first step of type checking a module
+consists of verifying that these names stand for modules of approptiate
+module types. As a rule of thumb,
+
+- the *extends* of a module must have the same *moduletype*
+- the *opens* of a module must be of type `resource`
+
+However, the precise rules are a little more fine-grained, because of
+the presence of interfaces and their instances, and the possibility to
+reuse abstract and concrete modules as resources. The following table
+gives, for all module types, the possible module types of their
+*extends* and *opens*, as well as the forms of judgement legal in that
+module type.
+
+ module type extends opens body
+ --------------------------- ------------ ------------ ----------------------------
+ `abstract` abstract \- `cat, fun, def, data`
+ `concrete of` *abstract* concrete resource\* `lincat, cat, oper, param`
+ `resource` resource\* resource\* `oper, param`
+ `interface` resource+ resource\* `oper, param`
+ `instance of` *interface* resource\* resource\* `oper, param`
+ `incomplete` concrete concrete+ resource+ `lincat, cat, oper, param`
+
+The table uses the following shorthands for lists of module types:
+
+- resource\*: resource, instance, concrete
+- resource+: resource\*, interface, abstract
+- concrete+: concrete, incomplete concrete
+
+The legality of judgements in the body is checked before the judgements
+themselves are checked.
+
+The forms of judgement are explained [here](#judgementforms).
+
+
+### Digression: the logic of module types
+
+Why are the legality conditions of opens and extends so complicated? The
+best way to grasp them is probably to consider a simplified logical
+model of the module system, replacing modules by types and functions.
+This model could actually be developed towards treating modules in GF as
+first-class objects; so far, however, this step has not been motivated
+by any practical needs.
+
+ module object and type
+ ------------------------------------------ -----------------------
+ abstract A = B A = B : type
+ concrete C of A = B C = B : A -\> S
+ interface I = B I = B : type
+ instance J of I = B J = B : I
+ incomplete concrete C of A = open I in B C = B : I -\> A -\> S
+ concrete K of A = C with (I=J) K = B(J) : A -\> S
+ resource R = B R = B : I
+ concrete C of A = open R in B C = B(R) : A -\> S
+
+A further step of defining modules as first-class objects would use
+GADTs and record types:
+
+- an abstract syntax is a Generalized Algebraic Datatype (GADT)
+- the target type `S` of concrete syntax is the type of nested tuples
+ over strings and integers
+- an interface is a labelled record type
+- an instance is a record of the type defined by the interface
+- a functor, with a module body opening an interface, is a function on
+ its instances
+- the instantiation of a functor is an application of the function to
+ some instance
+- a resource is a typed labelled record, putting together an interface
+ and an instance of it
+- the body of a module opening a resource is as a function on the
+ interface implicit in the resource; this function is immediately
+ applied to the instance defined in the resource
+
+Slightly unexpectedly, interfaces and instances are easier to understand
+in this way than resources - a resource is, indeed, more complex, since
+it fuses together an interface and an instance.
+
+[]{#openabstract}
+
+When an abstract is used as an interface and a concrete as its instance,
+they are actually reinterpreted so that they match the model. Then the
+abstract is no longer a GADT, but a system of *abstract* datatypes, with
+a record field of type `Type` for each category, and a function among
+these types for each abstract syntax function. A concrete syntax
+instantiates this record with linearization types and linearizations.
+
+
+### Inheritance
+
+After checking that the *extends* of a module are of appropriate module
+types, the compiler adds the inherited judgements to the judgements
+included in the body. The inherited judgements are not copied entirely,
+but their names with links to the inherited module. Conflicts may arise
+in this process: a name can have two definitions in the combined pool of
+inherited and added judgements. Such a conflict is always an error: GF
+provides no way to redefine an inherited constant.
+
+Simple as the definition of a conflict may sound, it has to take care of
+the inheritance hierarchy. A very common pattern of inheritance is the
+**diamond**: inheritance from two modules which themselves inherit a
+common base module. Assume that the base module defines a name `f`:
+
+ N
+ / \
+ M1 M2
+ \ /
+ Base {f}
+
+Now, `N` inherits `f` from both `M1` and `M2`, so is there a conflict?
+The answer in GF is *no*, because the \"two\" `f`\'s are in the end the
+same: the one defined in `Base`. The situation is thus simpler than in
+**multiple inheritance** in languages like C++, because definitions in
+GF are **immutable**: neither `M1` nor `M2` can possibly have changed
+the definition of `f` given in `Base`. In practice, the compiler manages
+inheritance through hierarchy in a very simple way, by just always
+creating a link not to the immediate parent, but the original ancestor;
+this ancestor can be read from the link provided by the immediate
+parent. Here is how links are created from source modules by the
+compiler:
+
+ Base {f}
+ M1 {m1} ===> M1 {Base.f, m1}
+ M2 {m2} ===> M2 {Base.f, m2}
+ N {n} ===> N {Base.f, M1.m1, M2.m2, n}
+
+[]{#restrictedinheritance}
+
+Inheritance can be **restricted**. This means that a module can be
+specified as inheriting *only* explicitly listed constants, or all
+constants *except* ones explicitly listed. The syntax uses constant
+names in brackets, prefixed by a minus sign in the case of an exclusion
+list. In the following configuration, N inherits `a,b,c` from `M1`, and
+all names but `d` from `M2`
+
+ N = M1 {a,b,c}, M2-{d}
+
+Restrictions are performed as a part of inheritance linking, module by
+module: the link is created for a constant if and only if it is both
+included in the module and compatible with the restriction. Thus, for
+instance, an inadvertent usage can exclude a constant from one module
+but inherit it from another one. In the following configuration, `f` is
+inherited via `M1`, if `M1` inherits it.
+
+ N = M1 [a,b,c], M2-[f]
+
+Unintended inheritance may cause problems later in compilation, in the
+judgement-level dependency analysis phase. For instance, suppose a
+function `f` has category `C` as its type in `M`, and we only include
+`f`. The exclusion has the effect of creating an ill-formed module:
+
+ abstract M = {cat C ; fun f : C ;}
+ M [f] ===> {fun f : C ;}
+
+One might expect inheritance restriction to be transitive: if an
+included constant *b* depends on some other constant *a*, then *a*
+should be included automatically. However, this rule would leave to
+hard-to-detect inheritances. And it could only be applied later in the
+compilation phase, when the compiler has not only collected the names
+defined, but also resolved the names used in definitions.
+
+Yet another pitfall with restricted inheritance is that it must be
+stated for each module separately. For instance, a concrete syntax of an
+abstract must exclude all those names that the abstract does, and a
+functor instantiation must replicate all restrictions of the functor.
+
+
+### Opening
+
+Opening makes constants from other modules usable in judgements, without
+inheriting them. This means that, unlike inheritance, opening is not
+transitive.
+
+[]{#qualifiednames}
+
+Opening cannot be restricted as inheritance can, but it can be
+**qualified**. This means that the names from the opened modules cannot
+be used as such, but only as prefixed by a qualifier and a dot (`.`).
+The qualifier can be any identifier, including the name of the module.
+Here is an example of an *opens* list:
+
+ open A, (X = XSLTS), (Y = XSLTS), B
+
+If `A` defines the constant `a`, it can be accessed by the names
+
+ a A.a
+
+If `XSLTS` defines the constant `x`, it can be accessed by the names
+
+ X.x Y.x XSLTS.x
+
+Thus qualification by real module name is always possible, and one and
+the same module can be qualified in different ways at the same time (the
+latter can be useful if you want to be able to change the
+implementations of some constants to a different resource later). Since
+the qualification with real module name is always possible, it is not
+possible to \"swap\" the names of modules locally:
+
+ open (A=B), (B=A) -- NOT POSSIBLE!
+
+The list of qualifiers names and module names in a module header may
+thus not contain any duplicates.
+
+
+### Name resolution
+
+[]{#renaming}
+
+**Name resolution** is the compiler phase taking place after inheritance
+linking. It qualifies all names occurring in the definition parts of
+judgements (that is, just excluding the defined names themselves) with
+the names of the modules they come from. If a name can come from
+different modules (that is, not from their common ancestor), a conflict
+is reported; this decision is hence not dependent on e.g. types, which
+are known only at a later phase.
+
+Qualification of names is the main device for avoiding conflicts in name
+resolution. No other information is used, such as priorities between
+modules. However, if a name is defined in different opened modules but
+never used in the module body, a conflict does not arise: conflicts
+arise only when names are used. Also in this respect, opening is thus
+different from inheritance, where conflicts are checked independently of
+use.
+
+As usual, inner scope has priority in name resolution. This means that
+if an identifier is in scope as a bound variable, it will not be
+interpreted as a constant, unless qualified by a module name (variable
+bindings are explained [here](#variablebinding)).
+
+
+### Functor instantiations
+
+We have dealt with the principles of module headers, inheritance, and
+names in a general way that applies to all module types. The exception
+is functor instantiations, that have an extra part of the instantiating
+equations, assigning an instance to every interface. Here is a typical
+example, displaying the full generality:
+
+ concrete FoodsEng of Foods = PhrasesEng **
+ FoodsI-[Pizza] with
+ (Syntax = SyntaxEng),
+ (LexFoods = LexFoodsEng) **
+ open SyntaxEng, ParadigmsEng in {
+ lin Pizza = mkCN (mkA "Italian") (mkN "pie") ;
+ }
+
+(The example is modified from Section 5.9 in the GF Tutorial.)
+
+The instantiation syntax is similar to qualified *opens*. The
+left-hand-side names must be interfaces, the right-hand-side names their
+instances. (Recall that `abstract` can be use as `interface` and
+`concrete` as its `instance`.) Inheritance from the functor can be
+restricted, typically in the purpose of defining some excluded functions
+in language-specific ways in the module body.
+
+
+### Completeness
+
+(This section refers to the forms of judgement introduced
+[here](#judgementforms).)
+
+A `concrete` is complete with respect to an `abstract`, if it contains a
+`lincat` definition for every `cat` declaration, and a `lin` definition
+for every `fun` declaration.
+
+The same completeness criterion applies to functor instantiations. It is
+not possible to use a partial functor instantiation, leading to another
+functor.
+
+Functors do not need to be complete in the sense concrete modules need.
+The missing definitions can then be provided in the body of each functor
+instantiation.
+
+A `resource` is complete, if all its `oper` and `param` judgements have
+a definition part. While a `resource` must be complete, an `interface`
+need not. For an `interface`, it is the definition parts of judgements
+are optional.
+
+An `instance` is complete with respect to an `interface`, if it gives
+the definition parts of all `oper` and `param` judgements that are
+omitted in the `interface`. Giving definitions to judgements that have
+already been defined in the `interface` is illegal. Type signatures, on
+the other hand, can be repeated if the same types are used.
+
+In addition to completing the definitions in an `interface`, its
+instance may contain other judgements, but these must all be complete
+with definitions.
+
+Here is an example of an instance and its interface showing the above
+variations:
+
+ interface Pos = {
+ param Case ; -- no definition
+ param Number = Sg | Pl ; -- definition given
+ oper Noun : Type = { -- relative definition given
+ s : Number => Case => Str
+ } ;
+ oper regNoun : Str -> Noun ; -- no definition
+ }
+
+ instance PosEng of Pos = {
+ param Case = Nom | Gen ; -- definition of Case
+ -- Number and Noun inherited
+ oper regNoun = \dog -> { -- type of regNoun inherited
+ s = table { -- definition of regNoun
+ Sg => table {
+ Nom => dog
+ -- etc
+ }
+ } ;
+ oper house_N : Noun = -- new definition
+ regNoun "house" ;
+ }
+
+
+Judgements
+----------
+
+
+### Overview of the forms of judgement
+
+[]{#judgementforms}
+
+A module body in GF is a set of **judgements**. Judgements are
+definitions or declarations, sometimes combinations of the two; the
+common feature is that every judgement introduces a name, which is
+available in the module and whenever the module is extended or opened.
+
+There are several different **forms of judgement**, identified by
+different **judgement keywords**. Here is a list of all these forms,
+together with syntax descriptions and the types of modules in which each
+form can occur. The table moreover indicates whether the judgement has a
+default value, and whether it contributes to the **name base**, i.e.
+introduces a new name to the scope.
+
+ judgement where module default base
+ ----------------------------- ---------------------------- ------------ --------- ------
+ `cat` C G G context abstract N/A yes
+ `fun` f : A A type abstract N/A yes
+ `def` f ps = t f fun, ps patterns, t term abstract yes no
+ `data` C = f `|` \... `|` g C cat, f\...g fun abstract yes no
+ `lincat` C = T C cat, T type concrete\* yes yes
+ `lin` f = t f fun, t term concrete\* no yes
+ `lindef` C = t C cat, t term concrete\* yes no
+ `linref` C = t C cat, t term concrete\* yes no
+ `printname cat` C = t C cat, t term concrete\* yes no
+ `printname fun` f = t f fun, t term concrete\* yes no
+ `param` P = C`|` \... `|` D C\...D constructors resource\* N/A yes
+ `oper` f : T = t T type, t term resource\* N/A yes
+ `flags` o = v o flag, v value all yes N/A
+
+Judgements that have default values are rarely used, except `lincat` and
+`flags`, which often need values different from the defaults.
+
+Introducing a name twice in the same module is an error. In other words,
+all judgements that have a \"yes\" in the name base column, must have
+distinct identifiers on their left-hand sides.
+
+All judgement end with semicolons (`;`).
+
+In addition to the syntax given in the table, many of the forms have
+syntactic sugar. This sugar will be explained below in connection to
+each form. There are moreover two kinds of syntactic sugar common to all
+forms:
+
+- the judgement keyword is shared between consecutive judgements until
+ a new keyword appears:
+ `keyw J ; K ;` === `keyw J ; keyw K ;`
+- the right-hand sides of colon (`:`) and equality (`=`) can be
+ shared, by using comma (`,`) as separator of left-hand sides, which
+ must consist of identifiers
+ `c,d : T` === `c : T ; d : T ;`
+ `c,d = t` === `c = t ; d = t ;`
+
+These conventions, like all syntactic sugar, are performed at an early
+compilation phase, directly after parsing. This means that e.g.
+
+ lin f,g = \x -> x ;
+
+can be correct even though `f` and `g` required different function
+types.
+
+Within a module, judgements can occur in any order. In particular, a
+name can be used before it is introduced.
+
+The explanations of judgement forms refer to the notions of **type** and
+**term** (the latter also called **expression**). These notions will be
+explained in detail [here](#expressions).
+
+
+### Category declarations, cat
+
+[]{#catjudgements}
+
+Category declarations
+
+`cat` *C* *G*
+
+define the **basic types** of abstract syntax. A basic type is formed
+from a category by giving values to all variables in the **context**
+*G*. If the context is empty, the basic type looks the same as the
+category itself. Otherwise, application syntax is used:
+
+*C* *a*~1~\...*a*~n~
+
+
+### Hypotheses and contexts
+
+[]{#contexts}
+
+A context is a sequence of **hypotheses**, i.e. variable-type pairs. A
+hypothesis is written
+
+`(` *x* `:` *T* `)`
+
+and a sequence does not have any separator symbols. As syntactic sugar,
+
+- variables can share a type,
+ `(` *x,y* `:` *T* `)` === `(` *x* `:` *T* `)` `(` *y* `:` *T* `)`
+- a **wildcard** can be used for a variable not occurring in types
+ later in the context,
+ `(` `_` `:` *T* `)` === `(` *x* `:` *T* `)`
+- if the variable does not occur later, it can be omitted altogether,
+ and parentheses are not used,
+ *T* === `(` *x* `:` *T* `)`
+ But if *T* is more complex than an identifier, it needs parentheses
+ to be separated from the rest of the context.
+
+An abstract syntax has **dependent types**, if any of its categories has
+a non-empty context.
+
+
+### Function declarations, fun
+
+Function declarations,
+
+`fun` *f* `:` *T*
+
+define the **syntactic constructors** of abstract syntax. The type *T*
+of *f* is built built from basic types (formed from categories) by using
+the function type constructor `->`. Thus its form is
+
+(*x*~1~ `:` *A*~1~) `->` \... `->` (*x*~n~ `:` *A*~n~) `->` *B*
+
+where *Ai* are types, called the **argument types**, and *B* is a basic
+type, called the **value type** of *f*. The **value category** of *f* is
+the category that forms the type *B*.
+
+A **syntax tree** is formed from *f* by applying it to a full list of
+arguments, so that the result is of a basic type.
+
+A **higher-order function** is one that has a function type as an
+argument. The concrete syntax of GF does not support displaying the
+bound variables of functions of higher than second order, but they are
+legal in abstract syntax.
+
+An abstract syntax is **context-free**, if it has neither dependent
+types nor higher-order functions. Grammars with context-free abstract
+syntax are an important subclass of GF, with more limited complexity
+than full GF. Whether the *concrete* syntax is context-free in the sense
+of the Chomsky hierarchy is independent of the context-freeness of the
+abstract syntax.
+
+
+### Function definitions, def
+
+Function definitions,
+
+`def` *f* *p*~1~ \... *p*~n~ `=` *t*
+
+where *f* is a `fun` function and *p*~i~\# are patterns, impose a
+relation of **definitional equality** on abstract syntax trees. They
+form the basis of **computation**, which is used when comparing whether
+two types are equal; this notion is relevant only if the types are
+dependent. Computation can also be used for the **normalization** of
+syntax trees, which applies even in context-free abstract syntax.
+
+The set of `def` definitions for *f* can be scattered around the module
+in which *f* is introduced as a function. The compiler builds the set of
+pattern equations in the order in which the equations appear; this order
+is significant in the case of overlapping patterns. All equations must
+appear in the same module in which *f* itself declared.
+
+The syntax of patterns will be specified [here](#patternmatching),
+commonly for abstract and concrete syntax. In abstract syntax,
+**constructor patterns** are those of the form
+
+*C* *p*~1~ \... *p*~n~
+
+where *C* is declared as `data` for some abstract syntax category (see
+next section). A **variable pattern** is either an identifier or a
+wildcard.
+
+A common pitfall is to forget to declare a constructor as data, which
+causes it to be interpreted as a variable pattern in definitions.
+
+Computation is performed by applying definitions and beta conversions,
+and in general by using **pattern matching**. Computation and pattern
+matching are explained commonly for abstract and concrete syntax
+[here](#patternmatching).
+
+In contrast to concrete syntax, abstract syntax computation is
+completely **symbolic**: it does not produce a value, but just another
+term. Hence it is not an error to have incomplete systems of pattern
+equations for a function. In addition, the definitions can be
+**recursive**, which means that computation can fail to terminate; this
+can never happen in concrete syntax.
+
+
+### Data constructor definitions, data
+
+A data constructor definition,
+
+`data` *C* `=` *f*~1~ `|` \... `|` *f*~n~
+
+defines the functions *f1*\...*fn* to be **constructors** of the
+category *C*. This means that they are recognized as constructor
+patterns when used in function definitions.
+
+In order for the data constructor definition to be correct,
+*f*~1~\...*f*~n~ must be functions with *C* as their value category.
+
+The complete set of constructors for a category *C* is the union of all
+its data constructor definitions. Thus a category can be \"extended\" by
+new constructors afterwards. However, all these constructor definitions
+must appear in the same module in which the category is itself defined.
+
+There is syntactic sugar for declaring a function as a constructor at
+the same time as introducing it:
+
+`data` *f* : *A*~1~ `->` \... `->` *A*~n~ `->` *C* *t*~1~ \... *t*~m~
+
+===
+
+`fun` *f* : *A*~1~ `->` \... `->` *A*~n~ `->` *C* *t*~1~ \... *t*~m~ ;
+`data` *C* = *f*
+
+
+### The semantic status of an abstract syntax function
+
+There are three possible statuses for a function declared in a `fun`
+judgement:
+
+- primitive notion: the default status
+- constructor: the function appears on the right-hand side in `data`
+ judgement
+- defined: the function has a `def` definition
+
+The \"constructor\" and \"defined\" statuses are in contradiction with
+each other, whereas the primitive notion status is overridden by any of
+the two others.
+
+This distinction is relevant for the semantics of abstract syntax, not
+for concrete syntax. It shows in the way patterns are treated in
+equations in `def` definitions: a constructor in a pattern matches only
+itself, whereas any other name is treated as a variable pattern, which
+matches anything.
+
+
+### Linearization type definitions, lincat
+
+A linearization type definition,
+
+`lincat` *C* `=` *T*
+
+defines the type of linearizations of trees whose type has category *C*.
+Type dependences have no effect on the linearization type.
+
+The type *T* must be a **legal linearization type**, which means that it
+is a *record type* whose fields have either parameter types, the type
+Str of strings, or table or record types of these. In particular,
+function types may not appear in *T*. A detailed explanation of types in
+concrete syntax will be given [here](#cnctypes).
+
+If *K* is the concrete syntax of an abstract syntax *A*, then *K* must
+define the linearization type of all categories declared in *A*.
+However, the definition can be omitted from the source code, in which
+case the default type `{s : Str}` is used.
+
+
+### Linearization definitions, lin
+
+A linearization definition,
+
+`lin` *f* `=` *t*
+
+defines the linearizations function of function *f*, i.e. the function
+used for linearizing trees formed by *f*.
+
+The type of *t* must be the homomorphic image of the type of *f*. In
+other words, if
+
+`fun` *f* `:` *A*~1~ `->` \... `->` *A*~n~ `->` *A*
+
+then
+
+`lin` *f* `:` *A*~1~\* `->` \... `->` *A*~n~\* `->` *A*\*
+
+where the type *T*\* is defined as follows depending on *T*:
+
+- (*C* *t*~1~ \... *t*~n~)\* = *T*, if `lincat` *C* `=` *T*
+- (*B*~1~ `->` \... `->` *B*~m~ `->` *B*)\* = *B*\*
+ `** {$0,...,$m : Str}`
+
+The second case is relevant for higher-order functions only. It says
+that the linearization type of the value type is extended by adding a
+string field for each argument types; these fields store the variable
+symbol used for the binding of each variable.
+
+[]{#HOAS}
+
+Since the arguments of a function argument are treated as bare strings,
+orders higher than the second are irrelevant for concrete syntax.
+
+There is syntactic sugar for binding the variables of the linearization
+of a function on the left-hand side:
+
+`lin` *f* *p* `=` *t* === `lin` *f* `= \`*p* `->` *t*
+
+The pattern *p* must be either a variable or a wildcard (`_`); this is
+what the syntax of lambda abstracts (`\p -> t`) requires.
+
+
+### Linearization default definitions, lindef
+
+[]{#lindefjudgements}
+
+A linearization default definition,
+
+`lindef` *C* `=` *t*
+
+defines the default linearization of category *C*, i.e. the function
+applicable to a string to make it into an object of the linearization
+type of *C*.
+
+Linearization defaults are invoked when linearizing variable bindings in
+higher-order abstract syntax. A variable symbol is then presented as a
+string, which must be converted to correct type in order for the
+linearization not to fail with an error.
+
+The other use of the defaults is for linearizing metavariables and
+abstract functions without linearization in the concrete syntax. In the
+first case the default linearization is applied to the string `"?X"`
+where `X` is the unique index of the metavariable, and in the second
+case the string is `"[f]"` where `f` is the name of the abstract
+function with missing linearization.
+
+Usually, linearization defaults are generated by using the default rule
+that \"uses the symbol itself for every string, and the first value of
+the parameter type for every parameter\". The precise definition is by
+structural recursion on the type:
+
+- default(Str,s) = s
+- default(P,s) = \#1(P)
+- default(P =\> T,s) = `\\_ =>` default(T,s)
+- default(`{`\... ; r : R ; \...`}`,s) = `{`\... ; r : default(R,s) ;
+ \...`}`
+
+The notion of the first value of a parameter type (\#1(P)) is defined
+[below](#paramvalues).
+
+
+### Linearization reference definitions, linref
+
+[]{#linrefjudgements}
+
+A linearization reference definition,
+
+`linref` *C* `=` *t*
+
+defines the reference linearization of category *C*, i.e. the function
+applicable to an object of the linearization type of *C* to make it into
+a string.
+
+The reference linearization is always applied to the top-level node of
+the abstract syntax tree. For example when we linearize the tree
+`f x1 x2 .. xn`, then we first apply `f` to its arguments which gives us
+an object of the linearization type of its category. After that we apply
+the reference linearization for the same category to get a string out of
+the object. This is particularly useful when the linearization type of
+*C* contains discontious constituents. In this case usually the
+reference linearization glues the constituents together to produce an
+intuitive linearization string.
+
+The reference linearization is also used for linearizing metavariables
+which stand in function position. For example the tree
+`f (? x1 x2 .. xn)` is linearized as follows. Each of the arguments
+`x1 x2 .. xn` is linearized, and after that the reference linearization
+of the its category is applied to the output of the linearization. The
+result is a sequence of `n` strings which are concatenated into a single
+string. The final string is the input to the default linearization of
+the category for the argument of `f`. After applying the default
+linearization we get an object that we could safely pass to `f`.
+
+Usually, linearization references are generated by using the rule that
+\"picks the first string in the linearization type\". The precise
+definition is by structural recursion on the type:
+
+- reference(Str,o) = Just o
+- reference(P,s) = Nothing
+- reference(P =\> T,o) = reference(T,o ! \#1(P)) \|\| reference(T,o !
+ \#2(P)) \|\| \... \|\| reference(T,o ! \#n(P))
+- reference({r1 : R1; \... rn : Rn},o) = reference(R1, o.r1) \|\|
+ reference(R2, o.r2) \|\| \... \|\| reference(Rn, o.rn)
+
+Here each call to reference returns either `(Just o)` or `Nothing`. When
+we compute the reference for a table or a record then we pick the
+reference for the first expression for which the recursive call gives us
+`Just`. If we get `Nothing` for all of them then the final result is
+`Nothing` too.
+
+
+### Printname definitions, printname cat and printname fun
+
+A category printname definition,
+
+`printname cat` *C* `=` *s*
+
+defines the printname of category *C*, i.e. the name used in some
+abstract syntax information shown to the user.
+
+Likewise, a function printname definition,
+
+`printname fun` *f* `=` *s*
+
+defines the printname of function *f*, i.e. the name used in some
+abstract syntax information shown to the user.
+
+The most common use of printnames is in the interactive syntax editor,
+where printnames are displayed in menus. It is possible e.g. to adapt
+them to each language, or to embed HTML tooltips in them (as is used in
+some HTML-based editor GUIs).
+
+Usually, printnames are generated automatically from the symbol and/or
+concrete syntax information.
+
+
+### Parameter type definitions, param
+
+[]{#paramjudgements}
+
+A parameter type definition,
+
+`param` *P* `=` *C*~1~ *G*~1~ `|` \... `|` *C*~n~ *G*~n~
+
+defines a parameter type *P* with the **parameter constructors**
+*C*~1~\...*C*~n~, with their respective contexts *G*~1~\...*G*~n~.
+
+[]{#paramtypes}
+
+Contexts have the same syntax as in `cat` judgements, explained
+[here](#catjudgements). Since dependent types are not available in
+parameter type definitions, the use of variables is never necessary. The
+types in the context must themselves be **parameter types**, which are
+defined as follows:
+
+- Given the judgement `param` *P* \..., *P* is a parameter type.
+- A record type of parameter types is a parameter type.
+- `Ints` *n* (an initial segment of integers) is a parameter type.
+
+The names defined by a parameter type definition include both the type
+name *P* and the constructor names *C*~i~. Therefore all these names
+must be distinct in a module.
+
+A parameter type may not be recursive, i.e. *P* itself may not occur in
+the contexts of its constructors. This restriction extends to mutual
+recursion: we say that *P* **depends** on the types that occur in the
+contexts of its constructors and on all types that those types depend
+on, and state that *P* may not depend on itself.
+
+In an `interface module`, it is possible to declare a parameter type
+without defining it,
+
+`param` *P* `;`
+
+
+### Parameter values
+
+[]{#paramvalues}
+
+All parameter types are finite, and the GF compiler will internally
+compute them to **lists of parameter values**. These lists are formed by
+traversing the `param` definitions, usually respecting the order of
+constructors in the source code. For records, bibliographical sorting is
+applied. However, both the order of traversal of `param` definitions and
+the order of fields in a record are specified in a compiler-internal
+way, which means that the programmer should not rely on any particular
+order.
+
+The order of the list of parameter values can affect the program in two
+cases:
+
+- in the default `lindef` definition ([here](#lindefjudgements)), the
+ first value is chosen
+- in course-of-value tables ([here](#tables)), the compiler-internal
+ order is followed
+
+The first usage implies that, if `lindef` definitions are essential for
+the application, they should be given manually. The second usage implies
+that course-of-value tables should be avoided in hand-written GF code.
+
+In run-time grammar generation, all parameter values are translated to
+integers denotions positions in these parameter lists.
+
+
+### Operation definitions, oper
+
+An operation definition,
+
+`oper` *h* `:` *T* `=` *t*
+
+defines an **operation** *h* of type *T*, with the computation rule
+
+*h* ==\> *t*
+
+The type *T* can be any concrete syntax type, including function types
+of any order. The term *t* must have the type *T*, as defined
+[here](#expressions).
+
+As syntactic sugar, the type can be omitted,
+
+`oper` *h* `=` *t*
+
+which works in two cases
+
+- the type can be inferred from *t* (compiler-dependent)
+- the definition occurs in an `instance` and the type is given in the
+ `interface`
+
+It is also possible to give the type and the definition separately:
+
+`oper` *h* `:` *T* ; `oper` *h* `=` *t* === `oper` *h* `:` *T* `=` *t*
+
+The order of the type part and the definition part is free, and there
+can be other judgements in between. However, they must occur in the same
+`resource` module for it to be complete (as defined
+[here](#completeness)). In an `interface` module, it is enough to give
+the type.
+
+When only the definition is given, it is possible to use a shorthand
+similar to `lin` judgements:
+
+`oper` *h* *p* `=` *t* === `oper` *h* `=` `\`*p* `->` *t*
+
+The pattern *p* is either a variable or a wildcard (`_`).
+
+Operation definitions may not be recursive, not even mutually recursive.
+This condition ensures that functions can in the end be eliminated from
+concrete syntax code (as explained [here](#functionelimination)).
+
+
+### Operation overloading
+
+[]{#overloading}
+
+One and the same operation name *h* can be used for different
+operations, which have to have different types. For each call of *h*,
+the type checker selects one of these operations depending on what type
+is expected in the context of the call. The syntax of overloaded
+operation definitions is
+
+`oper` *h* `= overload {`*h* : *T*~1~ = *t*~1~ ; \... ; *h* : *T*~n~ =
+*t*~n~`}`
+
+Notice that *h* must be the same in all cases. This format can be used
+to give the complete implementation; to give just the types, e.g. in an
+interface, one can use the form
+
+`oper` *h* `: overload {`*h* : *T*~1~ ; \... ; *h* : *T*~n~`}`
+
+The implementation of this operation typing is given by a judgement of
+the first form. The order of branches need not be the same.
+
+
+### Flag definitions, flags
+
+A flag definition,
+
+`flags` *o* `=` *v*
+
+sets the value of the flag *o*, to be used when compiling or using the
+module.
+
+The flag *o* is an identifier, and the value *v* is either an identifier
+or a quoted string.
+
+Flags are a kind of metadata, which do not strictly belong to the GF
+language. For instance, compilers do not necessarily check the
+consistency of flags, or the meaningfulness of their values. The
+inheritance of flags is not well-defined; the only certain rule is that
+flags set in the module body override the settings from inherited
+modules.
+
+Here are some flags commonly included in grammars.
+
+ flag value description module
+ ------------ -------------------- ---------------------------------- ----------
+ `coding` character encoding encoding used in string literals concrete
+ `startcat` category default target of parsing abstract
+
+The possible values of these flags are specified [here](#flagvalues).
+Note that the `lexer` and `unlexer` flags are deprecated. If you need
+their functionality, you should use supply them to GF shell commands
+like so:
+
+ put_string -lextext "страви, напої" | parse
+
+A summary of their possible values can be found at the [GF shell
+reference](http://www.grammaticalframework.org/doc/gf-shell-reference.html).
+
+
+Types and expressions
+---------------------
+
+
+### Overview of expression forms
+
+[]{#expressions}
+
+Like many dependently typed languages, GF makes no syntactic distinction
+between expressions and types. An illegal use of a type as an expression
+or vice versa comes out as a type error. Whether a variable, for
+instance, stands for a type or an expression value, can only be resolved
+from its context of use.
+
+One practical consequence of the common syntax is that global and local
+definitions (`oper` judgements and `let` expressions, respectively) work
+in the same way for types and expressions. Thus it is possible to
+abbreviate a type occurring in a type expression:
+
+ let A = {s : Str ; b : Bool} in A -> A -> A
+
+Type and other expressions have a system of **precedences**. The
+following table summarizes all expression forms, from the highest to the
+lowest precedence. Some expressions are moreover left- or
+right-associative.
+
+ prec expression example explanation
+ --------- ------------------------------------ -----------------------------------
+ 7 `c` constant or variable
+ 7 `Type` the type of types
+ 7 `PType` the type of parameter types
+ 7 `Str` the type of strings/token lists
+ 7 `"foo"` string literal
+ 7 `123` integer literal
+ 7 `0.123` floating point literal
+ 7 `?` metavariable
+ 7 `[]` empty token list
+ 7 `[C a b]` list category
+ 7 `["foo bar"]` token list
+ 7 `{"s : Str ; n : Num}` record type
+ 7 `{"s = "foo" ; n = Sg}` record
+ 7 `` tuple
+ 7 `` type-annotated expression
+ 6 left `t.r` projection or qualification
+ 5 left `f a` function application
+ 5 `table {Sg => [] ; _ => "xs"}` table
+ 5 `table P [a ; b ; c]` course-of-values table
+ 5 `case n of {Sg => [] ; _ => "xs"}` case expression
+ 5 `variants {"color" ; "colour"}` free variation
+ 5 `pre {vowel => "an" ; _ => "a"}` prefix-dependent choice
+ 4 left `t ! v` table selection
+ 4 left `A * B` tuple type
+ 4 left `R ** {b : Bool}` record (type) extension
+ 3 left `t + s` token gluing
+ 2 left `t ++ s` token list concatenation
+ 1 right `\x,y -> t` function abstraction (\"lambda\")
+ 1 right `\\x,y => t` table abstraction
+ 1 right `(x : A) -> B` dependent function type
+ 1 right `A -> B` function type
+ 1 right `P => T` table type
+ 1 right `let x = v in t` local definition
+ 1 `t where {x = v}` local definition
+ 1 `in M.C "foo"` rule by example
+
+Any expression in parentheses (`(`*exp*`)`) is in the highest precedence
+class.
+
+
+### The functional fragment: expressions in abstract syntax
+
+[]{#functiontype}
+
+The expression syntax is the same in abstract and concrete syntax,
+although only a part of the syntax is actually usable in well-typed
+expressions in abstract syntax. An abstract syntax is essentially used
+for defining a set of types and a set of functions between those types.
+Therefore it needs essentially the **functional fragment** of the
+syntax. This fragment comprises two kinds of types:
+
+- **basic types**, of form *C a1\...an* where
+ - `cat` *C* (*x*~1~ : *A*~1~)\...(*x*~n~ : *A*~n~), including the
+ predefined categories `Int`, `Float`, and `String` explained
+ [here](#predefabs)
+ - *a*~1~ : *A*~1~,\...,*a*~n~ : *A*~n~{*x*~1~ =
+ *a*~1~,\...,*x*~n-1~=*a*~n-1~}
+
+
+
+- **function types**, of form (*x* : *A*) `->` *B*, where
+ - *A* is a type
+ - *B* is a type possibly depending on *x* : *A*
+
+When defining basic types, we used the notation *t*{*x*~1~ =
+*t*~1~,\...,*x*~n~=*t*~n~} for the **substitution** of values to
+variables. This is a metalevel notation, which denotes a term that is
+formed by replacing the free occurrences of each variable *x*~i~ by
+*t*~i~.
+
+These types have six kinds of expressions:
+
+- **constants**, *f* : *A* where
+ - `fun` *f* : *A*
+
+
+
+- **literals** for integers, floats, and strings (defined in
+ [here](#predefabs))
+
+
+
+- **variables**, *x* : *A* where
+ - *x* has been introduced by a binding
+
+
+
+- **applications**, *f a* : *B*{*x*=*a*}, where
+ - *f* : (*x* : *A*) `->` *B*
+ - *a* : *A*
+
+
+
+- **abstractions**, `\`*x* `->` *b* : (*x* : *A*) `->` *B*, where
+ - *b* : *B* possibly depending on *x* : *A*
+
+
+
+- **metavariables**, `?`, as introduced in intermediate phases of
+ incremental type checking; metavariables are not permitted in GF
+ source code
+
+[]{#variablebinding}
+
+The notion of **binding** is defined for occurrences of variables in
+subexpressions as follows:
+
+- in (*x* : *A*) `->` *B*, *x* is bound in *B*
+- in `\`*x* `->` *b*, *x* is bound in *b*
+- in `def` *f* *p*~1~ \... *p*~n~ = *t*, any pattern variable
+ introduced in any *pi* is bound in *t* (as defined
+ [here](#patternmatching))
+
+As syntactic sugar, function types have sharing of types and suppression
+of variables, in the same way as contexts (defined [here](#contexts)):
+
+- variables can share a type,
+ `(` *x,y* `:` *A* `)` `->` *B* === `(` *x* `:` *A* `) -> (` *y* `:`
+ *A* `) ->` *B*
+- a **wildcard** can be used for a variable not occurring later in the
+ type,
+ `(` `_` `:` *A* `) ->` *B* === `(` *x* `:` *T* `) ->` *B*
+- if the variable does not occur later, it can be omitted altogether,
+ and parentheses are not used,
+ *A* `->` *B* === `(` *\_* `:` *A* `) ->` *B*
+
+There is analogous syntactic sugar for constant functions,
+
+`\`*\_* `->` *t* === `\`*x* `->` *t*
+
+where *x* does not occur in *t*, and for multiple lambda abstractions:
+
+`\`*p,q* `->` *t* === `\`*p* `->` `\`*q* `->` *t*
+
+where *p* and *q* are variables or wild cards (`_`).
+
+
+### Conversions
+
+Among expressions, there is a relation of **definitional equality**
+defined by four **conversion rules**:
+
+- **alpha conversion**: `\`*x* `->` *b* = `\`*y* `->` *b*{*x*=*y*}
+
+
+
+- **beta conversion**: (`\`*x* `->` *b*) *a* = *b*{*x*=*a*}
+
+
+
+- **delta conversion**: *f* *a*~1~ \... *a*~n~ = *tg*, if
+ - there is a definition `def` *f* *p*~1~ \... *p*~n~ = *t*
+ - this definition is the first for *f* that matches the sequence
+ *a*~1~ \.... *a*~n~, with the substitution *g*
+
+
+
+- **eta conversion**: *c* = `\`*x* `->` *c x*, if *c* : (*x* : *A*)
+ `->` *B*
+
+Pattern matching substitution used in delta conversion is defined
+[here](#patternmatching).
+
+An expression is in **beta-eta-normal form** if
+
+- it has no subexpressions to which beta conversion applies (beta
+ normality)
+- each constant or variable whose type is a function type must be
+ **eta-expanded**, i.e. made into an abstract equal to it by eta
+ conversion (eta normality)
+
+Notice that the iteration of eta expansion would lead to an expression
+not in beta-normal form.
+
+
+### Syntax trees
+
+[]{#syntaxtrees}
+
+The **syntax trees** defined by an abstract syntax are well-typed
+expressions of basic types in beta-eta normal form. Linearization
+defined in concrete syntax applies to all and only these expressions.
+
+There is also a direct definition of syntax trees, which does not refer
+to beta and eta conversions: keeping in mind that a type always has the
+form
+
+(*x*~1~ : *A*~1~) `->` \... `->` (*x*~n~ : *A*~n~) `->` *B*
+
+where *Ai* are types and *B* is a basic type, a syntax tree is an
+expression
+
+*b* *t*~1~ \... *t*~n~ : *B\'*
+
+where
+
+- *B\'* is the basic type *B*{*x*~1~ = *t*~1~,\...,*x*~n~ = *t*~n~}
+- `fun` *b* : (*x*~1~ : *A*~1~) `->` \... `->` (*x*~n~ : *A*~n~) `->`
+ *B*
+- each *t*~i~ has the form `\`*z*~1~,\...,*z*~m~ `->` *c* where *A*~i~
+ is
+ (*y*~1~ : *B*~1~) `->` \... `->` (*y*~m~ : *B*~m~) `->` *B*
+
+
+### Predefined types in abstract syntax
+
+[]{#predefabs}
+
+GF provides three predefined categories for abstract syntax, with
+predefined expressions:
+
+ category expressions
+ ---------- ---------------------------------------
+ `Int` integer literals, e.g. `123`
+ `Float` floating point literals, e.g. `12.34`
+ `String` string literals, e.g. `"foo"`
+
+These categories take no arguments, and they can be used as basic types
+in the same way as if they were introduced in `cat` judgements. However,
+it is not legal to define `fun` functions that have any of these types
+as value type: their only well-typed expressions are literals as defined
+in the above table.
+
+
+### Overview of expressions in concrete syntax
+
+[]{#cnctypes}
+
+Concrete syntax is about defining mappings from abstract syntax trees to
+**concrete syntax objects**. These objects comprise
+
+- records
+- tables
+- strings
+- parameter values
+
+Thus functions are not concrete syntax objects; however, the mappings
+themselves are expressed as functions, and the source code of a concrete
+syntax can use functions under the condition that they can be eliminated
+from the final compiled grammar (which they can; this is one of the
+fundamental properties of compilation, as explained in more detail in
+the *JFP* article).
+
+Concrete syntax thus has the same function types and expression forms as
+abstract syntax, specified [here](#functiontype). The basic types
+defined by categories (`cat` judgements) are available via grammar reuse
+explained [here](#reuse); this also comprises the predefined categories
+`Float` and `String`.
+
+
+### Values, canonical forms, and run-time variables
+
+In abstract syntax, the conversion rules fiven [here](#conversions)
+define a computational relation among expressions, but there is no
+separate notion of a **value** of computation: the value (the end point)
+of a computation chain is simply an expression to which no more
+conversions apply. In general, we are interested in expressions that
+satisfy the conditions of being syntax trees (as defined
+[here](#syntaxtrees)), but there can be many computationally equivalent
+syntax trees which nonetheless are distinct syntax trees and hence have
+different linearizations. The main use of computation in abstract syntax
+is to compare types in dependent type checking.
+
+In concrete syntax, the notion of values is central. At run time, we
+want to compute the values of linearizations; at compile time, we want
+to perform **partial evaluation**, which computes expressions as far as
+possible. To specify what happens in computation we therefore have to
+distinguish between **canonical forms** and other forms of expressions.
+The canonical forms are defined separately for each form of type,
+whereas the other forms may usually produce expressions of any type.
+
+[]{#linexpansion} []{#runtimevariables}
+
+What is done at compile time is the elimination of any noncanonical
+forms, except for those depending on **run-time variables**. Run-time
+variables are the same as the **argument variables** of linearization
+rules, i.e. the variables *x*~1~,\...,*x*~n~ in
+
+`lin` *f* `= \` *x*~1~,\...,*x*~n~ `->` *t*
+
+where
+
+`fun` *f* `:` (*x*~1~ : *A*~1~) `->` \... `->` (*x*~n~ : *A*~n~) `->`
+*B*
+
+Notice that this definition refers to the **eta-expanded** linearization
+term, which has one abstracted variable for each argument type of *f*.
+These variables are not necessarily explicit in GF source code, but
+introduced by the compiler.
+
+Since certain expression forms should be eliminated in compilation but
+cannot be eliminated if run-time variables appear in them, errors can
+appear late in compilation. This is an issue with the following
+expression forms:
+
+- gluing (`s + t`), defined [here](#gluing)
+- pattern matching on strings, defined [here](#patternmatching)
+- predefined string operations, defined [here](#predefcnc) (those
+ taking `Str` arguments)
+
+
+### Token lists, tokens, and strings
+
+[]{#strtype}
+
+The most prominent basic type is `Str`, the type of **token lists**.
+This type is often sloppily referred to as the type of **strings**; but
+it should be kept in mind that the objects of `Str` are *lists* of
+strings rather than single strings.
+
+Expressions of type `Str` have the following canonical forms:
+
+- **tokens**, i.e. **string literals**, in double quotes, e.g. `"foo"`
+- **the empty token list**, `[]`
+- **concatenation**, *s* `++` *t*, where *s,t* : `Str`
+- **prefix-dependent choice**,
+ `pre {p1 => s1 ; ... ; pn => sn ; _ => s }`, where
+ - *s*, *s*~1~,\...,*s*~n~, *p*~1~,\...,*p*~n~ : `Str`
+
+For convenience, the notation is overloaded so that tokens are
+identified with singleton token lists, and there is no separate type of
+tokens (this is a change from the *JFP* article). The notion of a token
+is still important for compilation: all tokens introduced by the grammar
+must be known at compile time. This, in turn, is required by the parsing
+algorithms used for parsing with GF grammars.
+
+In addition to string literals, tokens can be formed by a specific
+non-canonical operator:
+
+- **gluing**, *s* `+` *t*, where *s,t* : `Str`
+
+[]{#gluing}
+
+Being noncanonical, gluing is equipped with a computation rule: string
+literals are glued by forming a new string literal, and empty token
+lists can be ignored:
+
+- `"foo" + "bar"` ==\> `"foobar"`
+- *t* `+ []` ==\> *t*
+- `[] +` *t* ==\> *t*
+
+Since tokens must be known at compile time, the operands of gluing may
+not depend on run-time variables, as defined [here](#runtimevariables).
+
+As syntactic sugar, token lists can be given as bracketed string
+literals, where spaces separate tokens:
+
+- **token lists**, `["one two three"]` === `"one" ++ "two" ++ "three"`
+
+Notice that there are no empty tokens, but the expression `[]` can be
+used in a context requiring a token, in particular in gluing expression
+below. Since `[]` denotes an empty token list, the following computation
+laws are valid:
+
+- *t* `++ []` ==\> *t*
+- `[] ++` *t* ==\> *t*
+
+Moreover, concatenation and gluing are associative:
+
+- s `+` (t `+` u) ==\> s `+` t `+` u
+- s `++` (t `++` u) ==\> s `++` t `++` u
+
+For the programmer, associativity and the empty token laws mean that the
+compiler can use them to simplify string expressions. It also means that
+these laws are respected in pattern matching on strings.
+
+A prime example of prefix-dependent choice operation is the following
+approximative expression for the English indefinite article:
+
+ pre {
+ ("a" | "e" | "i" | "o") => "an" ;
+ _ => "a"
+ } ;
+
+This expression can be computed in the context of a subsequent token:
+
+- `pre {p1 => s1 ; ... ; pn => sn ; _ => s } ++ t` ==\>
+ - *s*~i~ for the first *i* such that the prefix *p*~i~ matches
+ *t*, if it exists
+ - *s* otherwise
+
+The **matching prefix** is defined by comparing the string with the
+prefix of the token. If the prefix is a variant list of strings, then it
+matches the token if any of the strings in the list matches it.
+
+The computation rule can sometimes be applied at compile time, but it
+general, prefix-dependent choices need to be passed to the run-time
+grammar, because they are not given a subsequent token to compare with,
+or because the subsequent token depends on a run-time variable.
+
+The prefix-dependent choice expression itself may not depend on run-time
+variables.
+
+*There is an older syntax for prefix-dependent choice, namely:
+`pre { s ; s1 / p1 ; ... ; sn / pn}`. This syntax will not accept
+strings as patterns.*
+
+*In GF prior to 3.0, a specific type* `Strs` *is used for defining
+prefixes,* *instead of just* `variants` *of* `Str`.
+
+
+### Records and record types
+
+A **record** is a collection of objects of possibly different types,
+accessible by **projections** from the record with **labels** pointing
+to these objects. A record is also itself an object, whose type is a
+**record type**. Record types have the form
+
+`{` *r*~1~ : *A*~1~ `;` \... `;` *r*~n~ : *A*~n~ `}`
+
+where *n* \>= 0, each *A*~i~ is a type, and the labels *r*~i~ are
+distinct. A record of this type has the form
+
+`{` *r*~1~ = *a*~1~ `;` \... `;` *r*~n~ = *a*~n~ `}`
+
+where each \#aii : \"Aii. A limiting case is the **empty record type**
+`{}`, which has the object `{}`, the **empty record**.
+
+The **fields** of a record type are its parts of the form *r* : *A*,
+also called **typings**. The **fields** of a record are of the form *r*
+= *a*, also called **value assignments**. Value assignments may
+optionally indicate the type, as in *r* : *A* = *a*.
+
+The order of fields in record types and records is insignificant: two
+record types (or records) are equal if they have the same fields, in any
+order, and a record is an object of a record type, if it has
+type-correct value assignments for all fields of the record type. The
+latter definition implies the even stronger principle of **record
+subtyping**: a record can have any type that has some subset of its
+fields. This principle is explained further [here](#subtyping).
+
+All fields in a record must have distinct labels. Thus it is not
+possible e.g. to \"redefine\" a field \"later\" in a record.
+
+Lexically, labels are identifiers (defined [here](#identifiers)). This
+is with the exception of the labels selecting bound variables in the
+linearization of higher-order abstract syntax, which have the form
+`$`*i* for an integer *i*, as specified [here](#HOAS). In source code,
+these labels should not appear in records fields, but only in
+selections.
+
+Labels occur only in syntactic positions where they cannot be confused
+with constants or variables. Therefore it is safe to write, as in
+`Prelude`,
+
+ ss : Str -> {s : Str} = \s -> {s = s} ;
+
+A **projection** is an expression of the form
+
+*t*.*r*
+
+where *t* must be a record and *r* must be a label defined in it. The
+type of the projection is the type of that field. The computation rule
+for projection returns the value assigned to that field:
+
+`{` \... `;` *r* = *a* `;` \... `}.`*r* ==\> *a*
+
+Notice that the dot notation *t*.*r* is also used for qualified names as
+specified [here](#qualifiednames). This ambiguity follows tradition and
+convenience. It is resolved by the following rules (before type
+checking):
+
+1. if *t* is a bound variable or a constant in scope, *t*.*r* is
+ type-checked as a projection
+2. otherwise, *t*.*r* is type-checked as a qualified name
+
+As syntactic sugar, types and values can be shared:
+
+- `{` \... `;` *r,s* : *A* `;` \... `}` === `{` \... `;` *r* : *A* `;`
+ *s* : *A* `;` \... `}`
+- `{` \... `;` *r,s* = *a* `;` \... `}` === `{` \... `;` *r* = *a* `;`
+ *s* = *a* `;` \... `}`
+
+Another syntactic sugar are **tuple types** and **tuples**, which are
+translated by endowing their unlabelled fields by the labels `p1`,
+`p2`,\... in the order of appearance of the fields:
+
+- *A*~1~ `*` \... `*` *A*~n~ === `{` `p1` : *A*~1~ `;` \... `;` `pn` :
+ *A*~n~ `}`
+- `<`*a*~1~ `,` \... `,` *a*~n~ `>` === `{` `p1` = *a*~1~`;` \... `;`
+ `pn` = *a*~n~ `}`
+
+A **record extension** is formed by adding fields to a record or a
+record type. The general syntax involves two expressions,
+
+*R* `**` *S*
+
+The result is a record type or a record with a union of the fields of
+*R* and *S*. It is therefore well-formed if
+
+- both *R* and *S* are either records or record types
+- the labels in *R* and *S* are disjoint, if *R* and *S* are record
+ types
+
+(Since GF version 3.6) If *R* and *S* are record objects, then the
+labels in them need not be disjoint. Labels defined in *S* are then
+given priority, so that record extensions in fact works as **record
+update**. A common pattern of using this feature is
+
+ lin F x ... = x ** {r = ... x.r ...}
+
+where `x` is a record with many fields, just one of which is updated.
+Following the normal binding conditions, `x.r` on the right hand side
+still refers to the old value of the `r` field.
+
+
+### Subtyping
+
+The possibility of having superfluous fields in a record forms the basis
+of the **subtyping** relation. That *A* is a subtype of *B* means that
+*a : A* implies *a : B*. This is clearly satisfied for records with
+superfluous fields:
+
+- if *R* is a record type without the label *r*, then *R* `** {` *r* :
+ *A* `}` is a subtype of *R*
+
+The GF grammar compiler extends subtyping to function types by
+**covariance** and **contravariance**:
+
+- covariance: if *A* is a subtype of *B*, then *C* `->` *A* is a
+ subtype of *C* `->` *B*
+- contravariance: if *A* is a subtype of *B*, then *B* `->` *C* is a
+ subtype of *A* `->` *C*
+
+The logic of these rules is natural: if a function is returns a value in
+a subtype, then this value is *a fortiori* in the supertype. If a
+function is defined for some type, then it is *a fortiori* defined for
+any subtype.
+
+In addition to the well-known principles of record subtyping and co- and
+contravariance, GF implements subtyping for initial segments of
+integers:
+
+- if *m* \< *n*, then `Ints` *m* is a subtype of `Ints` *n*
+- `Ints` *n* is a subtype of `Integer`
+
+As the last rule, subtyping is transitive:
+
+- if *A* is a subtype of *B* and *B* is a subtype of *C*, then *A* is
+ a subtype of *C*.
+
+
+### Tables and table types
+
+[]{#tables}
+
+One of the most characteristic constructs of GF is **tables**, also
+called **finite functions**. That these functions are finite means that
+it is possible to finitely enumerate all argument-value pairs; this, in
+turn, is possible because the argument types are finite.
+
+A **table type** has the form
+
+*P* `=>` *T*
+
+where *P* must be a parameter type in the sense defined
+[here](#paramtypes), whereas *T* can be any type.
+
+Canonical expressions of table types are **tables**, of the form
+
+`table` `{` *V*~1~ `=>` *t*~1~ ; \... ; *V*~n~ `=>` *t*~n~ `}`
+
+where *V*~1~,\...,*V*~n~ is the complete list of the parameter values of
+the argument type *P* (defined [here](#paramvalues)), and each *t*~i~ is
+an expression of the value type *T*.
+
+In addition to explicit enumerations, tables can be given by **pattern
+matching**,
+
+`table` `{`*p*~1~ `=>` *t*~1~ ; \... ; *p*~m~ `=>` *t*~m~`}`
+
+where *p*~1~,\....,*p*~m~ is a list of patterns that covers all values
+of type *P*. Each pattern *p*~i~ may bind some variables, on which the
+expression *t*~i~ may depend. A complete account of patterns and pattern
+matching is given [here](#patternmatching).
+
+A **course-of-values table** omits the patterns and just lists all
+values. It uses the enumeration of all values of the argument type *P*
+to pair the values with arguments:
+
+`table` *P* `[`*t*~1~ ; \... ; *t*~n~`]`
+
+This format is not recommended for GF source code, since the ordering of
+parameter values is not specified and therefore a compiler-internal
+decision.
+
+The argument type can be indicated in ordinary tables as well, which is
+sometimes helpful for type inference:
+
+`table` *P* `{` \... `}`
+
+The **selection** operator `!`, applied to a table *t* and to an
+expression *v* of its argument type
+
+*t* `!` *v*
+
+returns the first pattern matching result from *t* with *v*, as defined
+[here](#patternmatching). The order of patterns is thus significant as
+long as the patterns contain variables or wildcards. When the compiler
+reorders the patterns following the enumeration of all values of the
+argument type, this order no longer matters, because no overlap remains
+between patterns.
+
+The GF compiler performs **table expansion**, i.e. an analogue of eta
+expansion defined [here](#conversions), where a table is applied to all
+values to its argument type:
+
+*t* : *P* `=>` *T* ==\> `table` *P* `[`*t* `!` *V*~1~ ; \... ; *t* `!`
+*V*~n~`]`
+
+As syntactic sugar, one-branch tables can be written in a way similar to
+lambda abstractions:
+
+`\\`*p* `=>` *t* === `table {`*p* `=>` *t* `}`
+
+where *p* is either a variable or a wildcard (`_`). Multiple bindings
+can be abbreviated:
+
+`\\`*p,q* `=>` *t* === `\\`*p* `=>` `\\`*q* `=>` *t*
+
+**Case expressions** are syntactic sugar for selections:
+
+`case` *e* `of {`\...`}` === `table {`\...`} !` *e*
+
+
+### Pattern matching
+
+[]{#patternmatching}
+
+We will list all forms of patterns that can be used in table branches.
+We define their **variable bindings** and **matching substitutions**.
+
+We start with the patterns available for all parameter types, as well as
+for the types `Integer` and `Str`.
+
+- A constructor pattern *C* *p*~1~\...*p*~n~ binds the union of all
+ variables bound in the subpatterns *p*~1~,\...,*p*~n~. It matches
+ any value *C* *V*~1~\...*V*~n~ where each *p*~i~\# matches *V*~i~,
+ and the matching substitution is the union of these substitutions.
+- A record pattern `{` *r*~1~ `=` *p*~1~ `;` \... `;` *r*~n~ `=`
+ *p*~n~ `}` binds the union of all variables bound in the subpatterns
+ *p*~1~,\...,*p*~n~. It matches any value `{` *r*~1~ `=` *V*~1~ `;`
+ \... `;` *r*~n~ `=` *V*~n~ `;` \...`}` where each *p*~i~\# matches
+ *V*~i~, and the matching substitution is the union of these
+ substitutions.
+- A variable pattern *x* (identifier other than parameter constructor)
+ binds the variable *x*. It matches any value *V*, with the
+ substitution {*x* = *V*}.
+- The wild card `_` binds no variables. It matches any value, with the
+ empty substitution.
+- A disjunctive pattern *p* `|` *q* binds the intersection of the
+ variables bound by *p* and *q*. It matches anything that either *p*
+ or *q* matches, with the first substitution starting with *p*
+ matches, from which those variables that are not bound by both
+ patterns are removed.
+- A negative pattern `-` *p* binds no variables. It matches anything
+ that *p* does *not* match, with the empty substitution.
+- An alias pattern *x* `@` *p* binds *x* and all the variables bound
+ by *p*. It matches any value *V* that *p* matches, with the same
+ substition extended by {*x* = *V*}.
+
+The following patterns are only available for the type `Str`:
+
+- A string literal pattern, e.g. `"s"`, binds no variables. It matches
+ the same string, with the empty substitution.
+- A concatenation pattern, *p* `+` *q*, binds the union of variables
+ bound by *p* and *q*. It matches any string that consists of a
+ prefix matching *p* and a suffix matching *q*, with the union of
+ substitutions corresponding to the first match (see below).
+- A repetition pattern *p*`*` binds no variables. It matches any
+ string that can be decomposed into strings that match *p*, with the
+ empty substitution.
+
+The following pattern is only available for the types `Integer` and
+`Ints` *n*:
+
+- An integer literal pattern, e.g. `214`, binds no variables. It
+ matches the same integer, with the empty substitution.
+
+All patterns must be **linear**: the same pattern variable may occur
+only once in them. This is what makes it straightforward to speak about
+unions of binding sets and substitutions.
+
+Pattern matching is performed in the order in which the branches appear
+in the source code: the branch of the first matching pattern is
+followed. In concrete syntax, the type checker reject sets of patterns
+that are not exhaustive, and warns for completely overshadowed patterns.
+It also checks the type correctness of patterns with respect to the
+argument type. In abstract syntax, only type correctness is checked, no
+exhaustiveness or overshadowing.
+
+It follows from the definition of record pattern matching that it can
+utilize partial records: the branch
+
+ {g = Fem} => t
+
+in a table of type `{g : Gender ; n : Number} => T` means the same as
+
+ {g = Fem ; n = _} => t
+
+Variables in regular expression patterns are always bound to the **first
+match**, which is the first in the sequence of binding lists. For
+example:
+
+- `x + "e" + y` matches `"peter"` with `x = "p", y = "ter"`
+- `x + "er"*` matches `"burgerer"` with `x = "burg"`
+
+
+### Free variation
+
+An expressions of the form
+
+`variants` `{`*t*~1~ ; \... ; *t*~n~`}`
+
+where all *t*~i~ are of the same type *T*, has itseld type *T*. This
+expression presents *t*~i~,\...,*t*~n~ as being in **free variation**:
+the choice between them is not determined by semantics or parameters. A
+limiting case is
+
+`variants {}`
+
+which encodes a rule saying that there is no way to express a certain
+thing, e.g. that a certain inflectional form does not exist.
+
+A common wisdom in linguistics is that \"there is no free variation\",
+which refers to the situation where *all* aspects are taken into
+account. For instance, the English negation contraction could be
+expressed as free variation,
+
+ variants {"don't" ; "do" ++ "not"}
+
+if only semantics is taken into account, but if stylistic aspects are
+included, then the proper formulation might be with a parameter
+distinguishing between informal and formal style:
+
+ case style of {Informal => "don't" ; Formal => "do" ++ "not"}
+
+Since there is not way to choose a particular element from a
+\`\`variants\` list, free variants is normally not adequate in
+libraries, nor in grammars meant for natural language generation. In
+application grammars meant to parse user input, free variation is a way
+to avoid cluttering the abstract syntax with semantically insignificant
+distinctions and even to tolerate some grammatical errors.
+
+Permitting `variants` in all types involves a major modification of the
+semantics of GF expressions. All computation rules have to be lifted to
+deal with lists of expressions and values. For instance,
+
+*t* `!` `variants` `{`*t*~1~ ; \... ; *t*~n~`}` ==\> `variants` `{`*t*
+`!` *t*~1~ ; \... ; *t* `!` *t*~n~`}`
+
+This is done in such a way that variation does not distribute to records
+(or other product-like structures). For instance, variants of records,
+
+ variants {{s = "Auto" ; g = Neutr} ; {s = "Wagen" ; g = Masc}}
+
+is *not* the same as a record of variants,
+
+ {s = variants {"Auto" ; "Wagen"} ; g = variants {Neutr ; Masc}}
+
+Variants of variants are flattened,
+
+`variants` `{`\...; `variants` `{`*t*~1~ ;\...; *t*~n~`}` ;\...`}` ==\>
+`variants` `{`\...; *t*~1~ ;\...; *t*~n~ ;\...`}`
+
+and singleton variants are eliminated,
+
+`variants` `{`*t*`}` ==\> *t*
+
+
+### Local definitions
+
+A **local definition**, i.e. a **let expression** has the form
+
+`let` *x* : *T* = *t* `in` *e*
+
+The type of *x* must be *T*, which also has to be the type of *t*.
+Computation is performed by substituting *t* for *x* in *e*:
+
+`let` *x* : *T* = *t* `in` *e* ==\> *e* {*x* = *t*}
+
+As syntactic sugar, the type can be omitted if the type checker is able
+to infer it:
+
+`let` *x* = *t* `in` *e*
+
+It is possible to compress several local definitions into one block:
+
+`let` *x* : *T* = *t* `;` *y* : *U* = *u* `in` *e* === `let` *x* : *T* =
+*t* `in` `let` *y* : *U* = *u* `in` *e*
+
+Another notational variant is a definition block appearing after the
+main expression:
+
+*e* `where` `{`\...`}` === `let` `{`\...`}` `in` *e*
+
+Curly brackets are obligatory in the `where` form, and can also be
+optionally used in the `let` form.
+
+Since a block of definitions is treated as syntactic sugar for a nested
+`let` expression, a constant must be defined before it is used: the
+scope is not mutual, as in a module body. Furthermore, unlike in `lin`
+and `oper` definitions, it is *not* possible to bind variables on the
+left of the equality sign.
+
+
+### Function applications in concrete syntax
+
+[]{#functionelimination}
+
+Fully compiled concrete syntax may not include expressions of function
+types except on the outermost level of `lin` rules, as defined
+[here](#linexpansion). However, in the source code, and especially in
+`oper` definitions, functions are the main vehicle of code reuse and
+abstraction. Thus function types and functions follow the same rules as
+in abstract syntax, as specified [here](#functiontype). In particular,
+the application of a lambda abstract is computed by beta conversion.
+
+To ensure the elimination of functions, GF uses a special computation
+rule for pushing function applications inside tables, since otherwise
+run-time variables could block their applications:
+
+(`table` `{`*p*~1~ `=>` *f*~1~ ; \... ; *p*~n~ `=>` *f*~n~ `}` `!` *e*)
+*a* ==\> `table` `{`*p*~1~ `=>` *f*~1~ *a* ; \... ; *p*~n~ `=>` *f*~n~
+*a*`}` `!` *e*
+
+Also parameter constructors with non-empty contexts, as defined
+[here](#paramjudgements), result in expressions in application form.
+These expressions are never a problem if their arguments are just
+constructors, because they can then be translated to integers
+corresponding to the position of the expression in the enumaration of
+the values of its type. However, a constructor applied to a run-time
+variable may need to be converted as follows:
+
+*C*\...*x*\... ==\> `case` *x* of `{_ =>` *C*\...*x*`}`
+
+The resulting expression, when processed by table expansion as explained
+[here](#tables), results in *C* being applied to just values of the type
+of *x*, and the application thereby disappears.
+
+
+### Reusing top-level grammars as resources
+
+[]{#reuse}
+
+*This section is valid for GF 3.0, which abandons the \"lock field\"*
+*discipline of GF 2.8.*
+
+As explained [here](#openabstract), abstract syntax modules can be
+opened as interfaces and concrete syntaxes as their instances. This
+means that judgements are, as it were, translated in the following way:
+
+- `cat` *C* *G* ===\> `oper` *C* : `Type`
+- `fun` *f* : *T* ===\> `oper` *f* : *T*
+- `lincat` *C* = *T* ===\> `oper` *C* : `Type` = *C*
+- `lin` *f* = *t* ===\> `oper` *f* = *t*
+
+Notice that the value *T* of `lincat` definitions is not disclosed in
+the translation. This means that the type *C* remains abstract: the only
+ways of building an object of type *C* are the operations *f* obtained
+from *fun* and *lin* rules.
+
+The purpose of keeping linearization types abstract is to enforce
+**grammar checking via type checking**. This means that any well-typed
+operation application is also well-typed in the sense of the original
+grammar. If the types were disclosed, then we could for instance easily
+confuse all categories that have the linearization type `{s : Str}`. Yet
+another reason is that revealing the types makes it impossible for the
+library programmers to change their type definitions afterwards.
+
+Library writers may occasionally want to have access to the values of
+linearization types. The way to make it possible is to add an extra
+construction operation to a module in which the linearization type is
+available:
+
+ oper MkC : T -> C = \x -> x
+
+In object-oriented terms, the type *C* itself is **protected**, whereas
+*MkC* is a **public constructor** of *C*. Of course, it is possible to
+make these constructors overloaded (concept explained
+[here](#overloading)), to enable easy access to special cases.
+
+
+### Predefined concrete syntax types
+
+[]{#predefcnc}
+
+The following concrete syntax types are predefined:
+
+- `Str`, the type of tokens and token lists (defined [here](#strtype))
+- `Integer`, the type of nonnegative integers
+- `Ints` *n*, the type of integers from *0* to *n*
+- `Type`, the type of (concrete syntax) types
+- `PType`, the type of parameter types
+
+The last two types are, in a way, extended by user-written grammars,
+since new parameter types can be defined in the way shown
+[here](#paramjudgements), and every paramater type is also a type. From
+the point of view of the values of expressions, however, a `param`
+declaration does not extend `PType`, since all parameter types get
+compiled to initial segments of integers.
+
+Notice the difference between the concrete syntax types `Str` and
+`Integer` on the one hand, and the abstract syntax categories `String`
+and `Int`, on the other. As *concrete syntax* types, the latter are
+treated in the same way as any reused categories: their objects can be
+formed by using syntax trees (string and integer literals).
+
+*The type name* `Integer` *replaces in GF 3.0 the name* `Int`, *to avoid
+confusion with the abstract syntax type and to be analogous* *with the*
+`Str` *vs.* `String` *distinction.*
+
+
+### Predefined concrete syntax operations
+
+The following predefined operations are defined in the resource module
+`prelude/Predef.gf`. Their implementations are defined as a part of the
+GF grammar compiler.
+
+ -------------------------------------------------------------------------------------------------
+ operation type explanation
+ -------------- --------------------------------- ------------------------------------------------
+ `PBool` `PType` `PTrue | PFalse`
+
+ `Error` `Type` the empty type
+
+ `Int` `Type` the type of integers
+
+ `Ints` `Integer -> Type` the type of integers from 0 to n
+
+ `error` `Str -> Error` forms error message
+
+ `length` `Str -> Int` length of string
+
+ `drop` `Integer -> Str -> Str` drop prefix of length
+
+ `take` `Integer -> Str -> Str` take prefix of length
+
+ `tk` `Integer -> Str -> Str` drop suffix of length
+
+ `dp` `Integer -> Str -> Str` take suffix of length
+
+ `eqInt` `Integer -> Integer -> PBool` test if equal integers
+
+ `lessInt` `Integer -> Integer -> PBool` test order of integers
+
+ `plus` `Integer -> Integer -> Integer` add integers
+
+ `eqStr` `Str -> Str -> PBool` test if equal strings
+
+ `occur` `Str -> Str -> PBool` test if occurs as substring
+
+ `occurs` `Str -> Str -> PBool` test if any char occurs
+
+ `show` `(P : Type) -> P -> Str` convert param to string
+
+ `read` `(P : Type) -> Str -> P` convert string to param
+
+ `toStr` `(L : Type) -> L -> Str` find the \"first\" string
+
+ `nonExist` `Str` a special token marking\
+ non-existing morphological forms
+
+ `BIND` `Str` a special token marking\
+ that the surrounding tokens should not\
+ be separated by space
+
+ `SOFT_BIND` `Str` a special token marking\
+ that the surrounding tokens may not\
+ be separated by space
+
+ `SOFT_SPACE` `Str` a special token marking\
+ that the space between the surrounding tokens\
+ is optional
+
+ `CAPIT` `Str` a special token marking\
+ that the first character in the next token\
+ should be capitalized
+
+ `ALL_CAPIT` `Str` a special token marking\
+ that the next word should be\
+ in all capital letters
+ -------------------------------------------------------------------------------------------------
+
+Compilation eliminates these operations, and they may therefore not take
+arguments that depend on run-time variables.
+
+The module `Predef` is included in the *opens* list of all modules, and
+therefore does not need to be opened explicitly.
+
+
+Flags and pragmas
+-----------------
+
+
+### Some flags and their values
+
+[]{#flagvalues}
+
+The flag `coding` in concrete syntax sets the **character encoding**
+used in the grammar. Internally, GF uses unicode, and `.pgf` files are
+always written in UTF8 encoding. The presence of the flag `coding=utf8`
+prevents GF from encoding an already encoded file.
+
+The flag `startcat` in abstract syntax sets the default start category
+for parsing, random generation, and any other grammar operation that
+depends on category. Its legal values are the categories defined or
+inherited in the abstract syntax.
+
+
+### Compiler pragmas
+
+**Compiler pragmas** are a special form of comments prefixed with `--#`.
+Currently GF interprets the following pragmas.
+
+ pragma explanation
+ -------------- ---------------------------------
+ `-path=`PATH path list for searching modules
+
+For instance, the line
+
+ --# -path=.:present:prelude:/home/aarne/GF/tmp
+
+in the top of `FILE.gf` causes the GF compiler, when invoked on
+`FILE.gf`, to search through the current directory (`.`) and the
+directories `present`, `prelude`, and `/home/aarne/GF/tmp`, in this
+order. If a directory `DIR` is not found relative to the working
+directory, `$(GF_LIB_PATH)/DIR` is searched. `$GF_LIB_PATH` can be a
+colon-separated list of directories, in which case each directory in the
+list contributes to the search path expansion.
+
+
+Alternative grammar input formats
+---------------------------------
+
+While the GF language as specified in this document is the most
+versatile and powerful way of writing GF grammars, there are several
+other formats that a GF compiler may make available for users, either to
+get started with small grammars or to semiautomatically convert grammars
+from other formats to GF. Here are the ones supported by GF 2.8 and 3.0.
+
+
+### Old GF without modules
+
+[]{#oldgf}
+
+Before GF compiler version 2.0, there was no module system, and all
+kinds of judgement could be written in all files, without any headers.
+This format is still available, and the compiler (version 2.8) detects
+automatically if a file is in the current or the old format. However,
+the old format is not recommended because of pure modularity and missing
+separate compilation, and also because libraries are not available,
+since the old and the new format cannot be mixed. With version 2.8,
+grammars in the old format can be converted to modular grammar with the
+command
+
+ > import -o FILE.gf
+
+which rewrites the grammar divided into three files: an abstract, a
+concrete, and a resource module.
+
+
+### Context-free grammars
+
+A quick way to write a GF grammar is to use the context-free format,
+also known as BNF. Files of this form are recognized by the suffix
+`.cf`. Rules in these files have the form
+
+*Label* `.` *Cat* `::=` (*String* \| *Cat*)\* `;`
+
+where *Label* and *Cat* are identifiers and *String* quoted strings.
+
+There is a shortcut form generating labels automatically,
+
+*Cat* `::=` (*String* \| *Cat*)\* `;`
+
+In the shortcut form, vertical bars (`|`) can be used to give several
+right-hand-sides at a time. An empty right-hand side means the singleton
+of an empty sequence, and not an empty union.
+
+Just like old-style GF files (previous section), contex-free grammar
+files can be converted to modular GF by using the `-o` option to the
+compiler in GF 2.8.
+
+
+### Extended BNF grammars
+
+Extended BNF (`FILE.ebnf`) goes one step further from the shortcut
+notation of previous section. The rules have the form
+
+*Cat* `::=` *RHS* `;`
+
+where an *RHS* can be any regular expression built from quoted strings
+and category symbols, in the following ways:
+
+ RHS item explanation
+ ----------------- ------------------------
+ *Cat* nonterminal
+ *String* terminal
+ *RHS* *RHS* sequence
+ *RHS* `|` *RHS* alternatives
+ *RHS* `?` optional
+ *RHS* `*` repetition
+ *RHS* `+` non-empty repetition\|
+
+Parentheses are used to override standard precedences, where `|` binds
+weaker than sequencing, which binds weaker than the unary operations.
+
+The compiler generates not only labels, but also new categories
+corresponding to the regular expression combinations actually in use.
+
+Just like `.cf` files (previous section), `.ebnf` files can be converted
+to modular GF by using the `-o` option to the compiler in GF 2.8.
+
+
+### Example-based grammars
+
+**Example-based grammars** (`.gfe`) provide a way to use resource
+grammar libraries without having to know the names of functions in them.
+The compiler works as a preprocessor, saving the result in a (`.gf`)
+file, which can be compiled as usual.
+
+If a library is implemented as an abstract and concrete syntax, it can
+be used for parsing. Calls of library functions can therefore be formed
+by parsing strings in the library. GF has an expression format for this,
+
+`in` *C* *String*
+
+where *C* is the category in which to parse (it can be qualified by the
+module name) and the string is the input to parser. Expressions of this
+form are replaced by the syntax trees that result. These trees are
+always type-correct. If several parses are found, all but the first one
+are given in comments.
+
+Here is an example, from `GF/examples/animal/`:
+
+ --# -resource=../../lib/present/LangEng.gfc
+ --# -path=.:present:prelude
+
+ incomplete concrete QuestionsI of Questions = open Lang in {
+ lincat
+ Phrase = Phr ;
+ Entity = N ;
+ Action = V2 ;
+ lin
+ Who love_V2 man_N = in Phr "who loves men" ;
+ Whom man_N love_V2 = in Phr "whom does the man love" ;
+ Answer woman_N love_V2 man_N = in Phr "the woman loves men" ;
+ }
+
+The `resource` pragma shows the grammar that is used for parsing the
+examples.
+
+Notice that the variables `love_V2`, `man_N`, etc, are actually
+constants in the library. In the resulting rules, such as
+
+ lin Whom = \man_N -> \love_V2 ->
+ PhrUtt NoPConj (UttQS (UseQCl TPres ASimul PPos
+ (QuestSlash whoPl_IP (SlashV2 (DetCN (DetSg (SgQuant
+ DefArt)NoOrd)(UseN man_N)) love_V2)))) NoVoc ;
+
+those constants are nonetheless treated as variables, following the
+normal binding conventions, as stated [here](#renaming).
+
+
+The grammar of GF
+-----------------
+
+The following grammar is actually used in the parser of GF, although we
+have omitted some obsolete rules still included in the parser for
+backward compatibility reasons.
+
+This document was automatically generated by the *BNF-Converter*. It was
+generated together with the lexer, the parser, and the abstract syntax
+module, which guarantees that the document matches with the
+implementation of the language (provided no hand-hacking has taken
+place).
+
+
+The lexical structure of GF
+---------------------------
+
+
+### Identifiers
+
+Identifiers *Ident* are unquoted strings beginning with a letter,
+followed by any combination of letters, digits, and the characters `_ '`
+reserved words excluded.
+
+
+### Literals
+
+Integer literals *Integer* are nonempty sequences of digits.
+
+String literals *String* have the form `"`*x*`"`}, where *x* is any
+sequence of any characters except `"` unless preceded by `\`.
+
+Double-precision float literals *Double* have the structure indicated by
+the regular expression `digit+ '.' digit+ ('e' ('-')? digit+)?` i.e.
+two sequences of digits separated by a decimal point, optionally
+followed by an unsigned or negative exponent.
+
+
+### Reserved words and symbols
+
+The set of reserved words is the set of terminals appearing in the
+grammar. Those reserved words that consist of non-letter characters are
+called symbols, and they are treated in a different way from those that
+are similar to identifiers. The lexer follows rules familiar from
+languages like Haskell, C, and Java, including longest match and spacing
+conventions.
+
+The reserved words used in GF are the following:
+
+- `PType`
+- `Str`
+- `Strs`
+- `Type`
+- `abstract`
+- `case`
+- `cat`
+- `concrete`
+- `data`
+- `def`
+- `flags`
+- `fun`
+- `in`
+- `incomplete`
+- `instance`
+- `interface`
+- `let`
+- `lin`
+- `lincat`
+- `lindef`
+- `linref`
+- `of`
+- `open`
+- `oper`
+- `param`
+- `pre`
+- `printname`
+- `resource`
+- `strs`
+- `table`
+- `transfer`
+- `variants`
+- `where`
+- `with`
+
+The symbols used in GF are the following:
+
+- `;`
+- `=`
+- `:`
+- `->`
+- `{`
+- `}`
+- `**`
+- `,`
+- `(`
+- `)`
+- `[`
+- `]`
+- `-`
+- `.`
+- `|`
+- `?`
+- `<`
+- `>`
+- `@`
+- `!`
+- `*`
+- `+`
+- `++`
+- `\`
+- `=>`
+- `_`
+- `$`
+- `/`
+
+### Comments
+
+Single-line comments begin with `--`.
+Multiple-line comments are enclosed with `{-` and `-}`.
+
+
+The syntactic structure of GF
+-----------------------------
+
+Terminal appear as `code`.
+The symbols **->** (production), **|** (union) and **eps** (empty rule) belong to the BNF notation.
+All other symbols are non-terminals.
+
+ --------------------- -------- ------------------------------------------------------------------------------------
+ *Grammar* **->** *\[ModDef\]*
+ *\[ModDef\]* **->** **eps**
+ **|** *ModDef* *\[ModDef\]*
+ *ModDef* **->** *ModDef* `;`
+ **|** *ComplMod* *ModType* `=` *ModBody*
+ *ModType* **->** `abstract` *Ident*
+ **|** `resource` *Ident*
+ **|** `interface` *Ident*
+ **|** `concrete` *Ident* `of` *Ident*
+ **|** `instance` *Ident* `of` *Ident*
+ **|** `transfer` *Ident* `:` *Open* `->` *Open*
+ *ModBody* **->** *Extend* *Opens* `{` *\[TopDef\]* `}`
+ **|** *\[Included\]*
+ **|** *Included* `with` *\[Open\]*
+ **|** *Included* `with` *\[Open\]* `**` *Opens* `{` *\[TopDef\]* `}`
+ **|** *\[Included\]* `**` *Included* `with` *\[Open\]*
+ **|** *\[Included\]* `**` *Included* `with` *\[Open\]* `**` *Opens* `{` *\[TopDef\]* `}`
+ *\[TopDef\]* **->** **eps**
+ **|** *TopDef* *\[TopDef\]*
+ *Extend* **->** *\[Included\]* `**`
+ **|** **eps**
+ *\[Open\]* **->** **eps**
+ **|** *Open*
+ **|** *Open* `,` *\[Open\]*
+ *Opens* **->** **eps**
+ **|** `open` *\[Open\]* `in`
+ *Open* **->** *Ident*
+ **|** `(` *QualOpen* *Ident* `)`
+ **|** `(` *QualOpen* *Ident* `=` *Ident* `)`
+ *ComplMod* **->** **eps**
+ **|** `incomplete`
+ *QualOpen* **->** **eps**
+ *\[Included\]* **->** **eps**
+ **|** *Included*
+ **|** *Included* `,` *\[Included\]*
+ *Included* **->** *Ident*
+ **|** *Ident* `[` *\[Ident\]* `]`
+ **|** *Ident* `-` `[` *\[Ident\]* `]`
+ *Def* **->** *\[Name\]* `:` *Exp*
+ **|** *\[Name\]* `=` *Exp*
+ **|** *Name* *\[Patt\]* `=` *Exp*
+ **|** *\[Name\]* `:` *Exp* `=` *Exp*
+ *TopDef* **->** `cat` *\[CatDef\]*
+ **|** `fun` *\[FunDef\]*
+ **|** `data` *\[FunDef\]*
+ **|** `def` *\[Def\]*
+ **|** `data` *\[DataDef\]*
+ **|** `param` *\[ParDef\]*
+ **|** `oper` *\[Def\]*
+ **|** `lincat` *\[PrintDef\]*
+ **|** `lindef` *\[Def\]*
+ **|** `linref` *\[Def\]*
+ **|** `lin` *\[Def\]*
+ **|** `printname` `cat` *\[PrintDef\]*
+ **|** `printname` `fun` *\[PrintDef\]*
+ **|** `flags` *\[FlagDef\]*
+ *CatDef* **->** *Ident* *\[DDecl\]*
+ **|** `[` *Ident* *\[DDecl\]* `]`
+ **|** `[` *Ident* *\[DDecl\]* `]` `{` *Integer* `}`
+ *FunDef* **->** *\[Ident\]* `:` *Exp*
+ *DataDef* **->** *Ident* `=` *\[DataConstr\]*
+ *DataConstr* **->** *Ident*
+ **|** *Ident* `.` *Ident*
+ *\[DataConstr\]* **->** **eps**
+ **|** *DataConstr*
+ **|** *DataConstr* `|` *\[DataConstr\]*
+ *ParDef* **->** *Ident* `=` *\[ParConstr\]*
+ **|** *Ident* `=` `(` `in` *Ident* `)`
+ **|** *Ident*
+ *ParConstr* **->** *Ident* *\[DDecl\]*
+ *PrintDef* **->** *\[Name\]* `=` *Exp*
+ *FlagDef* **->** *Ident* `=` *Ident*
+ *\[Def\]* **->** *Def* `;`
+ **|** *Def* `;` *\[Def\]*
+ *\[CatDef\]* **->** *CatDef* `;`
+ **|** *CatDef* `;` *\[CatDef\]*
+ *\[FunDef\]* **->** *FunDef* `;`
+ **|** *FunDef* `;` *\[FunDef\]*
+ *\[DataDef\]* **->** *DataDef* `;`
+ **|** *DataDef* `;` *\[DataDef\]*
+ *\[ParDef\]* **->** *ParDef* `;`
+ **|** *ParDef* `;` *\[ParDef\]*
+ *\[PrintDef\]* **->** *PrintDef* `;`
+ **|** *PrintDef* `;` *\[PrintDef\]*
+ *\[FlagDef\]* **->** *FlagDef* `;`
+ **|** *FlagDef* `;` *\[FlagDef\]*
+ *\[ParConstr\]* **->** **eps**
+ **|** *ParConstr*
+ **|** *ParConstr* `|` *\[ParConstr\]*
+ *\[Ident\]* **->** *Ident*
+ **|** *Ident* `,` *\[Ident\]*
+ *Name* **->** *Ident*
+ **|** `[` *Ident* `]`
+ *\[Name\]* **->** *Name*
+ **|** *Name* `,` *\[Name\]*
+ *LocDef* **->** *\[Ident\]* `:` *Exp*
+ **|** *\[Ident\]* `=` *Exp*
+ **|** *\[Ident\]* `:` *Exp* `=` *Exp*
+ *\[LocDef\]* **->** **eps**
+ **|** *LocDef*
+ **|** *LocDef* `;` *\[LocDef\]*
+ *Exp6* **->** *Ident*
+ **|** *Sort*
+ **|** *String*
+ **|** *Integer*
+ **|** *Double*
+ **|** `?`
+ **|** `[` `]`
+ **|** `data`
+ **|** `[` *Ident* *Exps* `]`
+ **|** `[` *String* `]`
+ **|** `{` *\[LocDef\]* `}`
+ **|** `<` *\[TupleComp\]* `>`
+ **|** `<` *Exp* `:` *Exp* `>`
+ **|** `(` *Exp* `)`
+ *Exp5* **->** *Exp5* `.` *Label*
+ **|** *Exp6*
+ *Exp4* **->** *Exp4* *Exp5*
+ **|** `table` `{` *\[Case\]* `}`
+ **|** `table` *Exp6* `{` *\[Case\]* `}`
+ **|** `table` *Exp6* `[` *\[Exp\]* `]`
+ **|** `case` *Exp* `of` `{` *\[Case\]* `}`
+ **|** `variants` `{` *\[Exp\]* `}`
+ **|** `pre` `{` *Exp* `;` *\[Altern\]* `}`
+ **|** `strs` `{` *\[Exp\]* `}`
+ **|** *Ident* `@` *Exp6*
+ **|** *Exp5*
+ *Exp3* **->** *Exp3* `!` *Exp4*
+ **|** *Exp3* `*` *Exp4*
+ **|** *Exp3* `**` *Exp4*
+ **|** *Exp4*
+ *Exp1* **->** *Exp2* `+` *Exp1*
+ **|** *Exp2*
+ *Exp* **->** *Exp1* `++` *Exp*
+ **|** `\` *\[Bind\]* `->` *Exp*
+ **|** `\` `\` *\[Bind\]* `=>` *Exp*
+ **|** *Decl* `->` *Exp*
+ **|** *Exp3* `=>` *Exp*
+ **|** `let` `{` *\[LocDef\]* `}` `in` *Exp*
+ **|** `let` *\[LocDef\]* `in` *Exp*
+ **|** *Exp3* `where` `{` *\[LocDef\]* `}`
+ **|** `in` *Exp5* *String*
+ **|** *Exp1*
+ *Exp2* **->** *Exp3*
+ *\[Exp\]* **->** **eps**
+ **|** *Exp*
+ **|** *Exp* `;` *\[Exp\]*
+ *Exps* **->** **eps**
+ **|** *Exp6* *Exps*
+ *Patt2* **->** `_`
+ **|** *Ident*
+ **|** *Ident* `.` *Ident*
+ **|** *Integer*
+ **|** *Double*
+ **|** *String*
+ **|** `{` *\[PattAss\]* `}`
+ **|** `<` *\[PattTupleComp\]* `>`
+ **|** `(` *Patt* `)`
+ *Patt1* **->** *Ident* *\[Patt\]*
+ **|** *Ident* `.` *Ident* *\[Patt\]*
+ **|** *Patt2* `*`
+ **|** *Ident* `@` *Patt2*
+ **|** `-` *Patt2*
+ **|** *Patt2*
+ *Patt* **->** *Patt* `|` *Patt1*
+ **|** *Patt* `+` *Patt1*
+ **|** *Patt1*
+ *PattAss* **->** *\[Ident\]* `=` *Patt*
+ *Label* **->** *Ident*
+ **|** `$` *Integer*
+ *Sort* **->** `Type`
+ **|** `PType`
+ **|** `Str`
+ **|** `Strs`
+ *\[PattAss\]* **->** **eps**
+ **|** *PattAss*
+ **|** *PattAss* `;` *\[PattAss\]*
+ *\[Patt\]* **->** *Patt2*
+ **|** *Patt2* *\[Patt\]*
+ *Bind* **->** *Ident*
+ **|** `_`
+ *\[Bind\]* **->** **eps**
+ **|** *Bind*
+ **|** *Bind* `,` *\[Bind\]*
+ *Decl* **->** `(` *\[Bind\]* `:` *Exp* `)`
+ **|** *Exp4*
+ *TupleComp* **->** *Exp*
+ *PattTupleComp* **->** *Patt*
+ *\[TupleComp\]* **->** **eps**
+ **|** *TupleComp*
+ **|** *TupleComp* `,` *\[TupleComp\]*
+ *\[PattTupleComp\]* **->** **eps**
+ **|** *PattTupleComp*
+ **|** *PattTupleComp* `,` *\[PattTupleComp\]*
+ *Case* **->** *Patt* `=>` *Exp*
+ *\[Case\]* **->** *Case*
+ **|** *Case* `;` *\[Case\]*
+ *Altern* **->** *Exp* `/` *Exp*
+ *\[Altern\]* **->** **eps**
+ **|** *Altern*
+ **|** *Altern* `;` *\[Altern\]*
+ *DDecl* **->** `(` *\[Bind\]* `:` *Exp* `)`
+ **|** *Exp6*
+ *\[DDecl\]* **->** **eps**
+ **|** *DDecl* *\[DDecl\]*
+ --------------------- -------- ------------------------------------------------------------------------------------
diff --git a/doc/gf-shell-reference.t2t b/doc/gf-shell-reference.t2t
index 1ad915d41..bfe3f43ca 100644
--- a/doc/gf-shell-reference.t2t
+++ b/doc/gf-shell-reference.t2t
@@ -1,11 +1,8 @@
The GF Software System
-%!style:../css/style.css
%!options(html): --toc
%!options(html): --toc-level=4
-%!postproc(html):
-%!postproc(html):
%!postproc(html): "#VSPACE" ""
%!postproc(html): "#NORMAL" ""
%!postproc(html): "#TINY" ""
@@ -13,7 +10,7 @@ The GF Software System
The GF software system implements the GF programming language. Its
components are
-- the //compiler//,
+- the //compiler//,
translating ``.gf`` source files to ``.gfo`` object files, to
``.pgf`` run-time grammars, and to various other formats
- the //run-time system//,
@@ -43,7 +40,7 @@ The shell maintains a //state//, to which belong
Unless file arguments are provided to the ``gf`` command, the shell starts in an
-empty state, with no grammars and no history.
+empty state, with no grammars and no history.
In the shell, a set of commands
is available. Some of these commands may change the grammars in the state. The general
@@ -59,7 +56,7 @@ syntax of commands is given by the following BNF grammar:
ARGUMENT ::= QUOTED_STRING | TREE
VALUE ::= IDENT | QUOTED_STRING
```
-A command pipe is a sequence of commands interpreted in such a way
+A command pipe is a sequence of commands interpreted in such a way
that the output of each command
is send as input to the next. The option ``-tr`` causes GF to show a trace,
i.e. the intermediate result of the command to which it is attached.
@@ -69,7 +66,7 @@ executed one by one, in the order of appearance.
===GF shell commands===
-The full set of GF shell commands is listed below with explanations.
+The full set of GF shell commands is listed below with explanations.
This list can also be obtained in the GF shell by the command ``help -full``.
%!include: gf-help-full.txt
@@ -77,14 +74,14 @@ This list can also be obtained in the GF shell by the command ``help -full``.
==The GF batch compiler==
With the option ``-batch``, GF can be invoked in batch mode, i.e.
-without opening the shell, to compile files from ``.gf`` to ``.gfo``.
-The ``-s`` option ("silent") eliminates all messages except errors.
+without opening the shell, to compile files from ``.gf`` to ``.gfo``.
+The ``-s`` option ("silent") eliminates all messages except errors.
```
$ gf -batch -s LangIta.gf
```
With the option ``-make``, and as a set of
top-level grammar files (with the same abstract syntax) as arguments,
-GF produces a ``.pgf`` file. The flag ``-optimize-pgf`` minimizes
+GF produces a ``.pgf`` file. The flag ``-optimize-pgf`` minimizes
the size of the ``.pgf`` file, and is recommended for grammars to be shipped.
```
$ gf -make -optimize-pgf LangIta.gf LangEng.gf LangGer.gf
@@ -107,5 +104,3 @@ To run GF from a //script//, redirection of standard input can be used:
```
The file ``script.gfs`` should then contain a sequence of GF commands, one per line.
Unrecognized command lines are skipped without terminating GF.
-
-
diff --git a/doc/index.html b/doc/index.html
deleted file mode 100644
index 3d2cda8dc..000000000
--- a/doc/index.html
+++ /dev/null
@@ -1,69 +0,0 @@
-
-
-
-GF Documentation
-
-
-
-
-
-
-
- GF Quick Reference. Also available in
- pdf. Covers all features of GF language
- in a summary format.
-
-
- GF Reference Manual. A full-scale reference
- manual of the GF language.
-
-
- GF Shell Reference.
- Describes the commands available in the interactive GF shell. Also
- summarizes how to run GF as a batch compiler.
-
-
- Editor modes for GF.
- Editor modes for GF provides syntax highligting, automatic indentation and
- other features that makes editing GF grammar files easier.
-
-
-
-
-
Publications
-
-
-Bibliography: more publications on GF, as well as background literature.
-
-
-
diff --git a/doc/index.md b/doc/index.md
new file mode 100644
index 000000000..39d2c3036
--- /dev/null
+++ b/doc/index.md
@@ -0,0 +1,13 @@
+---
+title: Grammatical Framework Documentation
+---
+
+Perhaps you're looking for one of the following:
+
+- [Tutorial](tutorial/gf-tutorial.html). This is a hands-on introduction to grammar writing in GF.
+- [Reference Manual](gf-refman.html). A full-scale reference manual of the GF language.
+- [RGL Tutorial](../lib/doc/rgl-tutorial/index.html)
+- [RGL Synopsis](../lib/doc/synopsis/index.html). Documentation of the Resource Grammar Library, including the syntax API and lexical paradigms for each language.
+- [Shell Reference](gf-shell-reference.html). Describes the commands available in the interactive GF shell.
+ Also summarizes how to run GF as a batch compiler.
+- [Developers Guide](gf-developers/html). Detailed information about building and developing GF.
diff --git a/doc/runtime-api.html b/doc/runtime-api.html
index 4ed05c3d3..2bc74c409 100644
--- a/doc/runtime-api.html
+++ b/doc/runtime-api.html
@@ -1,29 +1,26 @@
-
+
+
+ C Runtime API
+
+
+
-
-
-
Using the PythonHaskellJavaC# binding to the C runtime
Before you use the Python binding you need to import the PGF2 modulepgf modulepgf packagePGFSharp package:
@@ -127,7 +138,7 @@ Concr eng = gr.Languages["AppEng"];
Parsing
-All language specific services are available as
+All language specific services are available as
methods of the class pgf.Concrfunctions that take as an argument an object of type Concrmethods of the class Concrmethods of the class Concr.
For example to invoke the parser, you can call:
Note that depending on the grammar it is absolutely possible that for
-a single sentence you might get infinitely many trees.
+
Note that depending on the grammar it is absolutely possible that for
+a single sentence you might get infinitely many trees.
In other cases the number of trees might be finite but still enormous.
-The parser is specifically designed to be lazy, which means that
+The parser is specifically designed to be lazy, which means that
each tree is returned as soon as it is found before exhausting
the full search space. For grammars with a patological number of
trees it is advisable to pick only the top N trees
@@ -246,16 +257,16 @@ parsing with a different start category can be done as follows:
-There is also the function parseWithHeuristics which
-takes two more paramaters which let you to have a better control
+There is also the function parseWithHeuristics which
+takes two more paramaters which let you to have a better control
over the parser's behaviour:
Prelude PGF2> let res = parseWithHeuristics eng (startCat gr) heuristic_factor callbacks
-There is also the method parseWithHeuristics which
-takes two more paramaters which let you to have a better control
+There is also the method parseWithHeuristics which
+takes two more paramaters which let you to have a better control
over the parser's behaviour:
Iterable<ExprProb> iterable = eng.parseWithHeuristics(gr.startCat(), heuristic_factor, callbacks);
@@ -281,7 +292,7 @@ to factor 0.0. When we increase the factor then parsing becomes faster
but at the same time the sorting becomes imprecise. The worst
factor is 1.0. In any case the parser always returns the same set of
trees but in different order. Our experience is that even a factor
-of about 0.6-0.8 with the translation grammar still orders
+of about 0.6-0.8 with the translation grammar still orders
the most probable tree on top of the list but further down the list,
the trees become shuffled.
@@ -457,7 +468,7 @@ the object has the following public final variables:
-The linearization works even if there are functions in the tree
+The linearization works even if there are functions in the tree
that doesn't have linearization definitions. In that case you
will just see the name of the function in the generated string.
It is sometimes helpful to be able to see whether a function
@@ -483,7 +494,7 @@ true
An already constructed tree can be analyzed and transformed
-in the host application. For example you can deconstruct
+in the host application. For example you can deconstruct
a tree into a function name and a list of arguments:
>>> e.unpack()
@@ -523,8 +534,8 @@ literal. For example the result from:
The result from unApp is Just if the expression
is an application and Nothing in all other cases.
-Similarly, if the tree is a literal string then the return value
-from unStr will be Just with the actual literal.
+Similarly, if the tree is a literal string then the return value
+from unStr will be Just with the actual literal.
For example the result from:
@@ -534,8 +545,8 @@ Prelude PGF2> readExpr "\"literal\"" >>= unStr
The result from unApp is not null if the expression
is an application, and null in all other cases.
-Similarly, if the tree is a literal string then the return value
-from unStr will not be null with the actual literal.
+Similarly, if the tree is a literal string then the return value
+from unStr will not be null with the actual literal.
For example the output from:
@@ -545,15 +556,15 @@ System.out.println(elit.unStr());
The result from UnApp is not null if the expression
is an application, and null in all other cases.
-Similarly, if the tree is a literal string then the return value
-from UnStr will not be null with the actual literal.
+Similarly, if the tree is a literal string then the return value
+from UnStr will not be null with the actual literal.
For example the output from:
-is just the string "literal".
+is just the string "literal".
Situations like this can be detected
in Python by checking the type of the result from unpack.
It is also possible to get an integer or a floating point number
@@ -569,7 +580,7 @@ There are also the methods UnAbs, UnInt, UnFloat and
-Constructing new trees is also easy. You can either use
+Constructing new trees is also easy. You can either use
readExpr to read trees from strings, or you can
construct new trees from existing pieces. This is possible by
@@ -612,7 +623,7 @@ Console.WriteLine(e2);
If the host application needs to do a lot of expression manipulations,
then it is helpful to use a higher-level API to the grammar,
also known as "embedded grammars" in GF. The advantage is that
-you can construct and analyze expressions in a more compact way.
+you can construct and analyze expressions in a more compact way.
In Python you first have to embed the grammar by calling:
@@ -721,7 +732,7 @@ call the method default. The following is an example:
def on_DetCN(self,quant,cn):
print("Found DetCN")
cn.visit(self)
-
+
def on_AdjCN(self,adj,cn):
print("Found AdjCN")
cn.visit(self)
@@ -1007,7 +1018,7 @@ Traceback (most recent call last):
pgf.PGFError: The concrete syntax is not loaded
-Before using the concrete syntax, you need to explicitly load it:
+Before using the concrete syntax, you need to explicitly load it:
>>> eng.load("AppEng.pgf_c")
>>> print(eng.lookupMorpho("letter"))
@@ -1060,7 +1071,7 @@ Traceback (most recent call last):
pgf.PGFError: The concrete syntax is not loaded
-Before using the concrete syntax, you need to explicitly load it:
+Before using the concrete syntax, you need to explicitly load it:
eng.load("AppEng.pgf_c")
for (MorphoAnalysis an : eng.lookupMorpho("letter")) {
@@ -1289,6 +1300,7 @@ graph {
}
+
+
-
diff --git a/doc/tutorial/gf-tutorial.t2t b/doc/tutorial/gf-tutorial.t2t
index 2e3f086f7..1af346a17 100644
--- a/doc/tutorial/gf-tutorial.t2t
+++ b/doc/tutorial/gf-tutorial.t2t
@@ -8,7 +8,7 @@ December 2010 for GF 3.2
% txt2tags --toc -ttex gf-tutorial.txt
%!target:html
-%!encoding: iso-8859-1
+%!encoding: utf-8
%!options: --toc
%!postproc(tex) : "\\subsection\*" "\\newslide"
@@ -618,32 +618,32 @@ and **semantic definitions**.
-#NEW
-
-==Slides==
-
-You can chop this tutorial into a set of slides by the command
-```
- htmls gf-tutorial.html
-```
-where the program ``htmls`` is distributed with GF (see below), in
-
- [``GF/src/tools/Htmls.hs`` http://grammaticalframework.org/src/tools/Htmls.hs]
-
-The slides will appear as a set of files beginning with ``01-gf-tutorial.htmls``.
-
-Internal links will not work in the slide format, except for those in the
-upper left corner of each slide, and the links behind the "Contents" link.
+% #NEW
+%
+% ==Slides==
+%
+% You can chop this tutorial into a set of slides by the command
+% ```
+% htmls gf-tutorial.html
+% ```
+% where the program ``htmls`` is distributed with GF (see below), in
+%
+% [``GF/src/tools/Htmls.hs`` http://grammaticalframework.org/src/tools/Htmls.hs]
+%
+% The slides will appear as a set of files beginning with ``01-gf-tutorial.htmls``.
+%
+% Internal links will not work in the slide format, except for those in the
+% upper left corner of each slide, and the links behind the "Contents" link.
#NEW
+#Lchaptwo
+
=Lesson 1: Getting Started with GF=
-#Lchaptwo
-
Goals:
- install and run GF
- write the first GF grammar: a "Hello World" grammar in three languages
@@ -836,8 +836,8 @@ Finnish and an Italian concrete syntaxes:
lin
Hello recip = {s = "terve" ++ recip.s} ;
World = {s = "maailma"} ;
- Mum = {s = "iti"} ;
- Friends = {s = "ystvt"} ;
+ Mum = {s = "äiti"} ;
+ Friends = {s = "ystävät"} ;
}
concrete HelloIta of Hello = {
@@ -925,7 +925,7 @@ Default of the language flag (``-lang``): the last-imported concrete syntax.
**Multilingual generation**:
```
> parse -lang=HelloEng "hello friends" | linearize
- terve ystvt
+ terve ystävät
ciao amici
hello friends
```
@@ -1037,9 +1037,10 @@ Application programs, using techniques from #Rchapeight:
#NEW
+#Lchapthree
+
=Lesson 2: Designing a grammar for complex phrases=
-#Lchapthree
Goals:
- build a larger grammar: phrases about food in English and Italian
@@ -1335,7 +1336,7 @@ Just (?) replace English words with their dictionary equivalents:
Phrase, Item, Kind, Quality = {s : Str} ;
lin
- Is item quality = {s = item.s ++ "" ++ quality.s} ;
+ Is item quality = {s = item.s ++ "è" ++ quality.s} ;
This kind = {s = "questo" ++ kind.s} ;
That kind = {s = "quel" ++ kind.s} ;
QKind quality kind = {s = kind.s ++ quality.s} ;
@@ -1446,11 +1447,11 @@ linearizations in different languages:
> gr -number=2 | l -treebank
Is (That Cheese) (Very Boring)
- quel formaggio molto noioso
+ quel formaggio è molto noioso
that cheese is very boring
Is (That Cheese) Fresh
- quel formaggio fresco
+ quel formaggio è fresco
that cheese is fresh
```
@@ -1472,14 +1473,14 @@ answer given in another language.
You can interrupt the quiz by entering a line consisting of a dot ('.').
this fish is warm
- questo pesce caldo
+ questo pesce è caldo
> Yes.
Score 1/1
this cheese is Italian
- questo formaggio noioso
- > No, not questo formaggio noioso, but
- questo formaggio italiano
+ questo formaggio è noioso
+ > No, not questo formaggio è noioso, but
+ questo formaggio è italiano
Score 1/2
this fish is expensive
@@ -1756,7 +1757,7 @@ Simultaneous extension and opening:
lincat
Question = SS ;
lin
- QIs item quality = ss (item.s ++ "" ++ quality.s) ;
+ QIs item quality = ss (item.s ++ "è" ++ quality.s) ;
Pizza = ss "pizza" ;
}
```
@@ -1797,9 +1798,10 @@ where
#NEW
+#Lchapfour
+
=Lesson 3: Grammars with parameters=
-#Lchapfour
Goals:
- implement sophisticated linguistic structures:
@@ -2364,10 +2366,10 @@ in English, with special care taken of variations with the suffix
+ Implement the German **Umlaut** operation on word stems.
The operation changes the vowel of the stressed stem syllable as follows:
-//a// to ////, //au// to //u//, //o// to ////, and //u// to ////. You
+//a// to //ä//, //au// to //äu//, //o// to //ö//, and //u// to //ü//. You
can assume that the operation only takes syllables as arguments. Test the
-operation to see whether it correctly changes //Arzt// to //rzt//,
-//Baum// to //Bum//, //Topf// to //Tpf//, and //Kuh// to //Kh//.
+operation to see whether it correctly changes //Arzt// to //Ärzt//,
+//Baum// to //Bäum//, //Topf// to //Töpf//, and //Kuh// to //Küh//.
@@ -2480,10 +2482,10 @@ The command ``morpho_quiz = mq`` generates inflection exercises.
Welcome to GF Morphology Quiz.
...
- rapparatre : VFin VCondit Pl P2
- rapparaitriez
- > No, not rapparaitriez, but
- rapparatriez
+ réapparaître : VFin VCondit Pl P2
+ réapparaitriez
+ > No, not réapparaitriez, but
+ réapparaîtriez
Score 0/1
```
To create a list for later use, use the command ``morpho_list = ml``
@@ -2563,7 +2565,7 @@ We need only number variation for the copula.
```
copula : Number -> Str =
\n -> case n of {
- Sg => "" ;
+ Sg => "è" ;
Pl => "sono"
} ;
```
@@ -2772,9 +2774,10 @@ Thus
#NEW
+#Lchapfive
+
=Lesson 4: Using the resource grammar library=
-#Lchapfive
Goals:
- navigate in the GF resource grammar library and use it in applications
@@ -3305,13 +3308,13 @@ we can write a **functor instantiation**,
oper
wine_N = mkN "Wein" ;
pizza_N = mkN "Pizza" "Pizzen" feminine ;
- cheese_N = mkN "Kse" "Ksen" masculine ;
+ cheese_N = mkN "Käse" "Käsen" masculine ;
fish_N = mkN "Fisch" ;
fresh_A = mkA "frisch" ;
- warm_A = mkA "warm" "wrmer" "wrmste" ;
+ warm_A = mkA "warm" "wärmer" "wärmste" ;
italian_A = mkA "italienisch" ;
expensive_A = mkA "teuer" ;
- delicious_A = mkA "kstlich" ;
+ delicious_A = mkA "köstlich" ;
boring_A = mkA "langweilig" ;
}
```
@@ -3362,11 +3365,11 @@ Lexicon instance
cheese_N = mkN "juusto" ;
fish_N = mkN "kala" ;
fresh_A = mkA "tuore" ;
- warm_A = mkA "lmmin" ;
+ warm_A = mkA "lämmin" ;
italian_A = mkA "italialainen" ;
expensive_A = mkA "kallis" ;
delicious_A = mkA "herkullinen" ;
- boring_A = mkA "tyls" ;
+ boring_A = mkA "tylsä" ;
}
```
Functor instantiation
@@ -3614,9 +3617,10 @@ tenses and moods, e.g. the Romance languages.
#NEW
+#Lchapsix
+
=Lesson 5: Refining semantics in abstract syntax=
-#Lchapsix
Goals:
- include semantic conditions in grammars, by using
@@ -3626,7 +3630,7 @@ Goals:
- semantic definitions
These concepts are inherited from **type theory** (more precisely:
-constructive type theory, or Martin-Lf type theory).
+constructive type theory, or Martin-Löf type theory).
Type theory is the basis **logical frameworks**.
@@ -4177,11 +4181,11 @@ Type checking can be invoked with ``put_term -transform=solve``.
#NEW
+#Lchapseven
+
==Lesson 6: Grammars of formal languages==
-#Lchapseven
-
Goals:
- write grammars for formal languages (mathematical notation, programming languages)
- interface between formal and natural langauges
@@ -4516,9 +4520,10 @@ point literals as arguments.
#NEW
+#Lchapeight
+
=Lesson 7: Embedded grammars=
-#Lchapeight
Goals:
- use grammars as parts of programs written in Haskell and JavaScript
@@ -4639,7 +4644,7 @@ output. Therefore it can be a part of a pipe and read and write files.
The simplest way to translate is to ``echo`` input to the program:
```
% echo "this wine is delicious" | ./trans Food.pgf
- questo vino delizioso
+ questo vino è delizioso
```
The result is given in all languages except the input language.
@@ -4958,12 +4963,12 @@ syntax name. This file contains the multilingual grammar as a JavaScript object.
===Using the JavaScript grammar===
To perform parsing and linearization, the run-time library
-``gflib.js`` is used. It is included in ``GF/lib/javascript/``, together with
+``gflib.js`` is used. It is included in ``/src/runtime/javascript/``, together with
some other JavaScript and HTML files; these files can be used
as templates for building applications.
An example of usage is
-[``translator.html`` http://grammaticalframework.org:41296],
+[``translator.html`` ../../src/runtime/javascript/translator.html],
which is in fact initialized with
a pointer to the Food grammar, so that it provides translation between the English
and Italian grammars:
diff --git a/doc/txt2html.sh b/doc/txt2html.sh
deleted file mode 100644
index 801541e95..000000000
--- a/doc/txt2html.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-
-FILES="darcs.txt transfer-reference.txt transfer-tutorial.txt \
- transfer.txt"
-
-for f in $FILES; do
- h=`basename "$f" ".txt"`.html
- if [ "$f" -nt "$h" ]; then
- txt2tags $f
- else
- echo "$h is newer than $f, skipping"
- fi
-done
diff --git a/download/encoding-change.t2t b/download/encoding-change.t2t
index b5d7d4059..0a56977de 100644
--- a/download/encoding-change.t2t
+++ b/download/encoding-change.t2t
@@ -1,10 +1,6 @@
GF character encoding changes
Thomas Hallgren
-%%mtime(%F)
-
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
+2013-12-18
==Changes to character encodings in GF grammar files ==
diff --git a/download/index-3.1.6.t2t b/download/index-3.1.6.t2t
index ee2ebbc85..2c04291ee 100644
--- a/download/index-3.1.6.t2t
+++ b/download/index-3.1.6.t2t
@@ -16,7 +16,7 @@ GF 3.1.6 released 23 April 2010.
- Windows (zipped executable):
[``gf-3.1.6-bin-i486-windows.zip`` gf-3.1.6-bin-i486-windows.zip]
(1.6 MB)
-- Ubuntu Linux (gzipped executable):
+- Ubuntu Linux (gzipped executable):
[``gf-3.1.6-bin-i486-linux.gz`` gf-3.1.6-bin-i486-linux.gz]
(1.7 MB)
- compiled library package:
@@ -25,7 +25,7 @@ GF 3.1.6 released 23 April 2010.
- full source package (GF system, libraries, examples, documentation):
[``gf-3.1.6-src.tar.gz`` gf-3.1.6-src.tar.gz]
(11 MB)
-
+
GF is also on [Hackage http://hackage.haskell.org/package/gf]
@@ -40,7 +40,7 @@ What's new? See the [release notes release-3.1.6.html].
==Installation instructions==
-The Windows package is installed by just unpacking it anywhere.
+The Windows package is installed by just unpacking it anywhere.
It finds the libraries relative to the ``.exe`` file.
To install a binary package for MacOS X or Linux: uncompress the executable and
diff --git a/download/index-3.2.9.t2t b/download/index-3.2.9.t2t
index f43ca30b0..61fa11a45 100644
--- a/download/index-3.2.9.t2t
+++ b/download/index-3.2.9.t2t
@@ -1,11 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.2.9** source-only snapshot was released on 12 September 2011.
What's new? Faster grammar compilation!
@@ -77,9 +72,3 @@ The above notes for installing from source apply also in this case.
- [GF 3.2 index-3.2.html] (December 2011).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
-
diff --git a/download/index-3.2.t2t b/download/index-3.2.t2t
index c71cbcbb8..a1ffc8a0f 100644
--- a/download/index-3.2.t2t
+++ b/download/index-3.2.t2t
@@ -1,11 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.2** was released on 23 December 2010.
What's new? See the [Release notes release-3.2.html].
@@ -27,7 +22,7 @@ More packages might be added later.
===Notes===
-The Windows package is installed by just unpacking it anywhere.
+The Windows package is installed by just unpacking it anywhere.
It finds the libraries relative to the ``.exe`` file.
The ``.deb`` packages work on Ubuntu 10.04 and 10.10.
@@ -105,8 +100,3 @@ Subsequently:
```
The above notes for installing from source apply also in this case.
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
-
diff --git a/download/index-3.3.3.t2t b/download/index-3.3.3.t2t
index 071041676..dfc77f84c 100644
--- a/download/index-3.3.3.t2t
+++ b/download/index-3.3.3.t2t
@@ -1,12 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.3.3** was released on 3 March 2012.
What's new? See the [Release notes release-3.3.3.html].
@@ -24,7 +18,7 @@ What's new? See the [Release notes release-3.3.3.html].
===Notes===
-The Windows package is installed by just unpacking it anywhere.
+The Windows package is installed by just unpacking it anywhere.
It finds the libraries relative to the ``.exe`` file.
%The ``.deb`` packages work on Ubuntu 10.04 and 10.10.
@@ -127,9 +121,3 @@ For more info, see the [GF Developers Guide ../doc/gf-developers.html].
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
-
diff --git a/download/index-3.3.t2t b/download/index-3.3.t2t
index 5c6cdb053..1f04d407a 100644
--- a/download/index-3.3.t2t
+++ b/download/index-3.3.t2t
@@ -1,11 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.3** was released on 27 October 2011.
What's new? See the [Release notes release-3.3.html].
@@ -27,7 +22,7 @@ More packages might be added later.
===Notes===
-The Windows package is installed by just unpacking it anywhere.
+The Windows package is installed by just unpacking it anywhere.
It finds the libraries relative to the ``.exe`` file.
%The ``.deb`` packages work on Ubuntu 10.04 and 10.10.
@@ -115,9 +110,3 @@ The above notes for installing from source apply also in this case.
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
-
diff --git a/download/index-3.4.t2t b/download/index-3.4.t2t
index b4edbfc01..278b3cb14 100644
--- a/download/index-3.4.t2t
+++ b/download/index-3.4.t2t
@@ -1,13 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.4** was released on 31 January 2013.
What's new? See the [Release notes release-3.4.html].
@@ -20,14 +13,11 @@ What's new? See the [Release notes release-3.4.html].
| Fedora (32-bit) | [Fedora RPMs /~hallgren/tmp/Fedora/] | ``sudo rpm -i ...``
| Ubuntu (32-bit) | [gf_3.4-1_i386.deb gf_3.4-1_i386.deb] | ``sudo dpkg -i gf_3.4-1_i386.deb``
| Ubuntu (64-bit) | [gf_3.4-1_amd64.deb gf_3.4-1_amd64.deb] | ``sudo dpkg -i gf_3.4-1_amd64.deb``
-| Windows | [gf-3.4-bin-windows.zip gf-3.4-bin-windows.zip] |
-%| ... | ... | ...
-
-%More binary packages might be added later.
+| Windows | [gf-3.4-bin-windows.zip gf-3.4-bin-windows.zip] | -
===Notes===
-%The Windows package is installed by just unpacking it anywhere.
+%The Windows package is installed by just unpacking it anywhere.
%It finds the libraries relative to the ``.exe`` file.
The ``.deb`` packages work on Ubuntu 12.04, 12.10 and 13.04.
@@ -153,8 +143,3 @@ For more info on working with the GF source code, see the
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/index-3.5.t2t b/download/index-3.5.t2t
index 9f1bbe4b2..0c1b2cafa 100644
--- a/download/index-3.5.t2t
+++ b/download/index-3.5.t2t
@@ -1,13 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.5** was released on 6 August 2013.
What's new? See the [Release notes release-3.5.html].
@@ -157,8 +150,3 @@ For more info on working with the GF source code, see the
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/index-3.6.t2t b/download/index-3.6.t2t
index 4ba7f67fd..9c4482e4d 100644
--- a/download/index-3.6.t2t
+++ b/download/index-3.6.t2t
@@ -1,13 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.6** was released on 23 June 2014.
What's new? See the [Release notes release-3.6.html].
@@ -177,8 +170,3 @@ For more info on working with the GF source code, see the
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/index-3.7.1.t2t b/download/index-3.7.1.t2t
index 0bbba65a9..b87f7d38c 100644
--- a/download/index-3.7.1.t2t
+++ b/download/index-3.7.1.t2t
@@ -1,12 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/notes.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.7.1** was released on 2 October 2015.
What's new? See the [Release notes release-3.7.1.html].
@@ -46,7 +40,7 @@ The ``.deb`` packages work on Ubuntu 12.04, 14.04 and 15.04.
The packages for Mac OS X should work on at least 10.9, 10.10 and 10.11 (Mavericks, Yosemite and El Capitan).
-(*) **Note** that for compatibility with OS X 10.11,
+(*) **Note** that for compatibility with OS X 10.11,
``gf-3.7.1.pkg`` will install the ``gf`` executable in ``/usr/local/bin``
instead of ``/usr/bin``, so make sure ``/usr/local/bin`` is in your ``$PATH``.
Also, if you still have an older version of GF installed in ``/usr/bin``,
@@ -180,8 +174,3 @@ For more info on working with the GF source code, see the
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/index-3.7.t2t b/download/index-3.7.t2t
index d234336e4..c59d04ebd 100644
--- a/download/index-3.7.t2t
+++ b/download/index-3.7.t2t
@@ -1,13 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.7** was released on 25 June 2015.
What's new? See the [Release notes release-3.7.html].
@@ -173,8 +166,3 @@ For more info on working with the GF source code, see the
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/index-3.8.t2t b/download/index-3.8.t2t
index 1222cdff1..9cafd548c 100644
--- a/download/index-3.8.t2t
+++ b/download/index-3.8.t2t
@@ -1,12 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/notes.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.8** was released on 22 June 2016.
What's new? See the [Release notes release-3.8.html].
@@ -49,7 +43,7 @@ Linux distributions.
The packages for Mac OS X should work on at least 10.9, 10.10 and 10.11 (Mavericks, Yosemite and El Capitan).
-(*) **Note** that for compatibility with OS X 10.11,
+(*) **Note** that for compatibility with OS X 10.11,
``gf-3.8.pkg`` will install the ``gf`` executable in ``/usr/local/bin``
instead of ``/usr/bin``, so make sure ``/usr/local/bin`` is in your ``$PATH``.
Also, if you still have an older version of GF installed in ``/usr/bin``,
@@ -171,8 +165,3 @@ For more info on working with the GF source code, see the
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/index.t2t b/download/index-3.9.t2t
similarity index 92%
rename from download/index.t2t
rename to download/index-3.9.t2t
index d39fffb17..9b3473a4d 100644
--- a/download/index.t2t
+++ b/download/index-3.9.t2t
@@ -1,12 +1,6 @@
Grammatical Framework Download and Installation
-%!style:../css/notes.css
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-%!postproc(html):
-
**GF 3.9** was released on 11 August 2017.
What's new? See the [Release notes release-3.9.html].
@@ -18,10 +12,11 @@ What's new? See the [Release notes release-3.9.html].
| macOS | [gf-3.9.pkg gf-3.9.pkg] | //GF+S+C+J+P// | Double-click on the package icon
| macOS | [gf-3.9-bin-intel-mac.tar.gz gf-3.9-bin-intel-mac.tar.gz] | //GF+S+C+J+P// | ``sudo tar -C /usr/local -zxf gf-3.9-bin-intel-mac.tar.gz``
%| Fedora (32-bit) | [Fedora RPMs /~hallgren/tmp/Fedora/] | //GF+S+C+J+P// | ``sudo rpm -i ...``
-| Raspian 9.1 | [gf_3.9-1_armhf.deb gf_3.9-1_armhf.deb] | //GF+S+C+J+P// | ``sudo dpkg -i gf_3.9-1_armhf.deb``
+| Raspbian 9.1 | [gf_3.9-1_armhf.deb gf_3.9-1_armhf.deb] | //GF+S+C+J+P// | ``sudo dpkg -i gf_3.9-1_armhf.deb``
| Ubuntu (32-bit) | [gf_3.9-1_i386.deb gf_3.9-1_i386.deb] | //GF+S+C+J+P// | ``sudo dpkg -i gf_3.9-1_i386.deb``
| Ubuntu (64-bit) | [gf_3.9-1_amd64.deb gf_3.9-1_amd64.deb] | //GF+S+C+J+P// | ``sudo dpkg -i gf_3.9-1_amd64.deb``
| Windows | [gf-3.9-bin-windows.zip gf-3.9-bin-windows.zip] | //GF+S// | ``unzip gf-3.9-bin-windows.zip``
+
%| MINGW | [gf-3.9-bin-i686-MINGW32_NT-6.1.tar.gz gf-3.9-bin-i686-MINGW32_NT-6.1.tar.gz] | //GF+S+C// | ``tar -C / gf-3.9-bin-i686-MINGW32_NT-6.1.tar.gz``
%| ... | ... | ... | ...
@@ -176,6 +171,11 @@ The above notes for installing from source apply also in these cases.
For more info on working with the GF source code, see the
[GF Developers Guide ../doc/gf-developers.html].
+==Using Stack==
+
+You can also use [Stack https://www.haskellstack.org] to compile GF, just replace ``cabal install`` above
+with ``stack install`` (assuming you already have Stack set up).
+
==Older releases==
- [GF 3.8 index-3.8.html] (June 2016)
@@ -190,8 +190,3 @@ For more info on working with the GF source code, see the
- [GF 3.2 index-3.2.html] (December 2010).
- [GF 3.1.6 index-3.1.6.html] (April 2010).
- [GF 3.1 old-index.html] (December 2009).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/index.md b/download/index.md
new file mode 100644
index 000000000..e1d3322c1
--- /dev/null
+++ b/download/index.md
@@ -0,0 +1,187 @@
+---
+title: Grammatical Framework Download and Installation
+...
+
+**GF 3.10** was released on 2 December 2018.
+
+What's new? See the [release notes](release-3.10.html).
+
+## Binary packages
+
+These binary packages include both the GF core (compiler and runtime) as well as the pre-compiled RGL.
+
+| Platform | Download | Features | How to install |
+|:----------------|:---------------------------------------------------|:---------------|:-----------------------------------|
+| macOS | [gf-3.10.pkg](gf-3.10.pkg) | GF, S, C, J, P | Double-click on the package icon |
+| Raspbian 10 (buster) | [gf\_3.10-2\_armhf.deb](gf_3.10-2_armhf.deb) | GF,S,C,J,P | `sudo dpkg -i gf_3.10-2_armhf.deb` |
+| Ubuntu (32-bit) | [gf\_3.10-2\_i386.deb](gf_3.10-2_i386.deb) | GF, S, C, J, P | `sudo dpkg -i gf_3.10-2_i386.deb` |
+| Ubuntu (64-bit) | [gf\_3.10-2\_amd64.deb](gf_3.10-2_amd64.deb) | GF, S, C, J, P | `sudo dpkg -i gf_3.10-2_amd64.deb` |
+| Windows | [gf-3.10-bin-windows.zip](gf-3.10-bin-windows.zip) | GF, S | `unzip gf-3.10-bin-windows.zip` |
+
+
+
+**Features**
+
+- GF = GF shell and grammar compiler
+- S = `gf -server` mode
+- C = C run-time system
+- J/P = Java/Python binding to the C run-time system
+
+### Notes
+
+The Windows package is installed by just unpacking it anywhere. You will
+probably need to set the `PATH` and `GF_LIB_PATH` environment variables,
+see Inari's notes on [Installing GF on Windows](http://www.grammaticalframework.org/~inari/gf-windows.html#toc3).
+
+The Ubuntu `.deb` packages should work on Ubuntu 16.04 and 18.04 and
+similar Linux distributions. The `.deb` packages were updated
+to version 3.10-2 after the release of GF 3.10.
+(Because of a packaging bug the Resource Grammar Library was missing
+in the 3.10-1 packages.)
+
+
+
+The packages for macOS (Mac OS X) should work on at least 10.13 and
+10.14 (High Sierra and Mojave)
+
+
+
+## Installing the latest release from source
+
+[GF is on Hackage](http://hackage.haskell.org/package/gf), so under
+normal circumstances the procedure is fairly simple:
+
+1. Install a recent version of the [Haskell
+ Platform](http://hackage.haskell.org/platform) (see note below)
+2. `cabal update`
+3. On Linux: install some C libraries from your Linux distribution (see note below)
+4. `cabal install gf`
+
+This installs the GF executable and Haskell libraries, but **does not include the RGL**.
+
+You can also download the source code release from [GitHub](https://github.com/GrammaticalFramework/gf-core/releases),
+and follow the instructions below under **Installing from the latest developer source code**.
+
+### Notes
+
+**Installation location**
+
+The above steps installs GF for a single user. The executables are put
+in `$HOME/.cabal/bin` (or, with recent versions of the Haskell platform
+on Mac OS X, in `$HOME/Library/Haskell/bin`), so it is a good idea to
+put a line in your `.bash_profile` or `.profile` to add that directory
+to you path:
+
+```
+PATH=$HOME/.cabal/bin:$PATH
+```
+
+or
+
+```
+PATH=$HOME/Library/Haskell/bin:$PATH
+```
+
+**Build tools**
+
+In order to compile GF you need the build tools **Alex** and **Happy**.
+These can be installed via Cabal, e.g.:
+
+```
+cabal install alex happy
+```
+
+or obtained by other means, depending on your OS.
+
+**Haskeline**
+
+GF uses [`haskeline`](http://hackage.haskell.org/package/haskeline), which
+on Linux depends on some non-Haskell libraries that won't be installed
+automatically by cabal, and therefore need to be installed manually.
+Here is one way to do this:
+
+- On Ubuntu: `sudo apt-get install libghc-haskeline-dev`
+- On Fedora: `sudo yum install ghc-haskeline-devel`
+
+**GHC version**
+
+The GF source code has been updated to compile with GHC 8.4.
+Using older versions of GHC (e.g. 8.2, 8.0 and 7.10) should still work too.
+
+## Installing from the latest developer source code
+
+If you haven't already, clone the repository with:
+
+```
+git clone https://github.com/GrammaticalFramework/gf-core.git
+```
+
+If you've already cloned the repository previously, update with:
+
+```
+git pull
+```
+
+Then install with:
+
+```
+cabal install
+```
+
+or, if you're a Stack user:
+
+```
+stack install
+```
+
+The above notes for installing from source apply also in these cases.
+For more info on working with the GF source code, see the
+[GF Developers Guide](../doc/gf-developers.html).
+
+## Installing the RGL from source
+
+To install the RGL from source,
+you can download a release from [GitHub](https://github.com/GrammaticalFramework/gf-rgl/releases)
+or get the latest version by cloning the repository:
+
+```
+git clone https://github.com/GrammaticalFramework/gf-rgl.git
+```
+
+In both cases, once you have the RGL sources you can install them by running:
+
+```
+make
+```
+
+in the RGL folder.
+This assumes that you already have GF installed.
+For more details about building the RGL, see the [RGL README](https://github.com/GrammaticalFramework/gf-rgl/blob/master/README.md).
+
+## Older releases
+
+- [GF 3.9](index-3.9.html) (August 2017)
+- [GF 3.8](index-3.8.html) (June 2016)
+- [GF 3.7.1](index-3.7.1.html) (October 2015)
+- [GF 3.7](index-3.7.html) (June 2015)
+- [GF 3.6](index-3.6.html) (June 2014)
+- [GF 3.5](index-3.5.html) (August 2013)
+- [GF 3.4](index-3.4.html) (January 2013)
+- [GF 3.3.3](index-3.3.3.html) (March 2012)
+- [GF 3.3](index-3.3.html) (October 2011)
+- [GF 3.2.9](index-3.2.9.html) source-only snapshot (September 2011)
+- [GF 3.2](index-3.2.html) (December 2010)
+- [GF 3.1.6](index-3.1.6.html) (April 2010)
diff --git a/download/release-3.1.6.t2t b/download/release-3.1.6.t2t
index aec8bb650..c3500665a 100644
--- a/download/release-3.1.6.t2t
+++ b/download/release-3.1.6.t2t
@@ -5,13 +5,13 @@ GF Version 3.1.6 Release Notes
=Installation=
The binaries now work out of the box for each platform and support
-completions (file names and parsing), because readline has been
+completions (file names and parsing), because readline has been
changed to haskeline.
To compile from source, GHC 6.12 is now required. But GHC is not needed
if the binary executables are used.
-Binaries (``.gfo`` and ``.pgf`` files) compiled with GF 3.1 are incompatible
+Binaries (``.gfo`` and ``.pgf`` files) compiled with GF 3.1 are incompatible
with 3.1.6 and must either be removed; alternatively, the ``-src`` flag can be
used when compiling.
@@ -24,8 +24,8 @@ Grammar language
- improved support for dependent types (see ``SUMO``, ``nqueens`` in ``examples``)
-Shell commands and options (see ``help`` in GF for more information)
-- ``eb``: example-based grammar file conversion
+Shell commands and options (see ``help`` in GF for more information)
+- ``eb``: example-based grammar file conversion
(see ``examples/animals/QuestionI.gf``)
- ``vd = visualize_dependency``: show dependency tree
- ``vp = visualize_parse``: show parse tree
@@ -57,8 +57,3 @@ Internal
Javascript generation is not updated to the new PGF format.
[GF 3.1 old-index.html] should still be used for building Javascript applications.
-
-
-
-
-
diff --git a/download/release-3.10.md b/download/release-3.10.md
new file mode 100644
index 000000000..710390911
--- /dev/null
+++ b/download/release-3.10.md
@@ -0,0 +1,66 @@
+---
+title: GF 3.10 Release Notes
+date: 2 December 2018
+...
+
+## Installation
+
+See the [download page](index.html).
+
+## What's new
+
+In this release, the GF "core" (compiler and runtimes) and RGL have been split into separate repositories.
+The binary packages on the downloads page contain both GF and the RGL, but the sources are now separate:
+[gf-core](https://github.com/GrammaticalFramework/gf-core) and
+[gf-rgl](https://github.com/GrammaticalFramework/gf-rgl).
+
+Over 300 changes have been pushed to GF and over 600 changes have been made to the RGL
+since the release of GF 3.9 in August 2017.
+
+## General
+
+- Travis integration:
+GF [](https://travis-ci.org/GrammaticalFramework/gf-core) and
+RGL [](https://travis-ci.org/GrammaticalFramework/gf-rgl)
+- A lot of bug fixes and repository cleanup, including things moved to new repositories:
+ - [Phrasebook](https://github.com/GrammaticalFramework/gf-contrib/tree/master/phrasebook)
+ - [Wide coverage translator](https://github.com/GrammaticalFramework/wide-coverage)
+ - [Mobile apps](https://github.com/GrammaticalFramework/gf-offline-translator)
+ - [gftest](https://github.com/GrammaticalFramework/gftest)
+ - [gf-mode](https://github.com/GrammaticalFramework/gf-emacs-mode) for Emacs
+ - [RGL browser](https://github.com/GrammaticalFramework/rgl-source-browser) (live [here](http://www.grammaticalframework.org/~john/rgl-browser/))
+- A fresh look for the GF website.
+
+## GF compiler and run-time library
+
+- Extensive improvements in the C runtime and bindings to it from Python, Java, Haskell, C#
+- A GF shell which uses the C runtime
+- Better error messages
+- GF now has a Stack configuration file
+- The compiler source code has been updated for compatibility with GHC 8.4.3.
+- `GF_LIB_PATH` can now be `path1:path2:path3`, not just `path1`
+- Add TypeScript type definitions for `gflib.js`
+- New compiler/shell options
+ - added option `-output-format=java` for producing code for embedded grammars in Java
+ - `rf -paragraphs`
+ - `linearize -tabtreebank`
+ - A new function called `completions` is added in the Haskell runtime and used in PGFService. This makes the extraction of completions more platform independent
+
+## Resource Grammar Library
+
+- [Bash build script](https://github.com/GrammaticalFramework/gf-rgl/blob/master/Setup.sh), for building the RGL without Haskell
+- [Windows build script](https://github.com/GrammaticalFramework/gf-rgl/blob/master/Setup.bat), for building the RGL without Haskell on a regular Windows command shell
+- New languages:
+ - Basque
+ - Portuguese
+- Big progress with Arabic, Turkish, Persian
+- Introduction of `Extend` module to combine the functions of `Extra` and `Extensions` in a more disciplined way
+- Various fixes for several languages.
+- Various fixes in the translation dictionaries.
+
+## Apps and Cloud services
+
+- Sort list of public grammars by age by default
+- Browser compatibility fixes
+- Allow public grammars to be deleted in more cases
+- Show grammar comments in the list of public grammars
diff --git a/download/release-3.2.t2t b/download/release-3.2.t2t
index 3ad975697..60800adc9 100644
--- a/download/release-3.2.t2t
+++ b/download/release-3.2.t2t
@@ -1,9 +1,6 @@
GF Version 3.2 Release Notes
December 2010
-%!style:../css/style.css
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -34,8 +31,3 @@ See the [download page http://www.grammaticalframework.org/download/index.html].
- GF compiler: GPL
- Run-time libraries and Resource Grammar Library: LGPL + BSD
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.3.3.t2t b/download/release-3.3.3.t2t
index 99387df01..c0f5fba98 100644
--- a/download/release-3.3.3.t2t
+++ b/download/release-3.3.3.t2t
@@ -1,10 +1,6 @@
GF Version 3.3.3 Release Notes
March 2012
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -25,8 +21,3 @@ See the [download page http://www.grammaticalframework.org/download/index.html].
- Fix for a bug that prevented the shell commands ``abstract_info``,
``generate_random`` and ``generate_trees`` from working properly.
- Various other small improvements and bug fixes.
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.3.t2t b/download/release-3.3.t2t
index 00dd1bece..6c007afb5 100644
--- a/download/release-3.3.t2t
+++ b/download/release-3.3.t2t
@@ -1,17 +1,13 @@
GF Version 3.3 Release Notes
October 2011
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
==New features==
-- Source language extension: it is now possible to override the oper definitions in an interface, by using the
+- Source language extension: it is now possible to override the oper definitions in an interface, by using the
header syntax ``instance Foo of Bar - [f,g,h]``.
- New functionalities in GF shell commands (more information with ``help`` command-name).
- ``aw`` = ``align_words`` option ``-giza`` prints word alignments in Giza++ format.
@@ -29,8 +25,3 @@ See the [download page http://www.grammaticalframework.org/download/index.html].
and the web-based grammar editor.
- Faster grammar compilation (also included in the GF 3.2.9 source-only
snapshot).
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.4.t2t b/download/release-3.4.t2t
index a317f7fd0..fa1f18a0a 100644
--- a/download/release-3.4.t2t
+++ b/download/release-3.4.t2t
@@ -1,10 +1,6 @@
GF Version 3.4 Release Notes
January 2013
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -46,8 +42,3 @@ See the [download page http://www.grammaticalframework.org/download/index.html].
- Some new functionality in the web-based grammar editor, e.g. preliminary
support for public grammars.
- Various other small improvements and bug fixes.
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.5.t2t b/download/release-3.5.t2t
index c05c36f26..c9ffe568c 100644
--- a/download/release-3.5.t2t
+++ b/download/release-3.5.t2t
@@ -1,10 +1,6 @@
GF 3.5 Release Notes
August 2013
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -70,7 +66,3 @@ of GF 3.4.
[``network-2.4.1.1`` https://github.com/haskell/network/commit/f2168b1f8978b4ad9c504e545755f0795ac869ce].
- Various other small improvements and bug fixes.
%- [...]
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.6.t2t b/download/release-3.6.t2t
index 084756a42..aa75f5d6a 100644
--- a/download/release-3.6.t2t
+++ b/download/release-3.6.t2t
@@ -1,10 +1,6 @@
GF 3.6 Release Notes
June 2014
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -26,7 +22,7 @@ Closed [issues http://code.google.com/p/grammatical-framework/issues/list]:
as ``--# -coding=``//enc//, instead of ``flags coding=``//enc//.
See the separate document
[GF character encoding changes encoding-change.html] for more details.
-- Record update: in record objects (but not types) of form ``r ** s``, the values assigned
+- Record update: in record objects (but not types) of form ``r ** s``, the values assigned
in ``s`` now overwrite those in ``r``. In previous versions, record extensions with
overlapping assignments in ``r`` and ``s`` were not supported, and their behaviour was
unpredictable.
@@ -107,8 +103,3 @@ Closed [issues http://code.google.com/p/grammatical-framework/issues/list]:
- ``c-wordforword``: this works as ``c-translate`` but does a
word-for-word lookup to create a (potentially very low quality)
translation that can be used if all else fails.
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.7.1.t2t b/download/release-3.7.1.t2t
index 8b2a27aca..c31ee0d93 100644
--- a/download/release-3.7.1.t2t
+++ b/download/release-3.7.1.t2t
@@ -1,10 +1,6 @@
GF 3.7.1 Release Notes
October 2015
-%!style:../css/notes.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -18,10 +14,10 @@ Over 170 changes have been pushed to the source repository since
====New features and notable changes====
-- GF shell: ``cc -trace`` (preliminary): you can now do things like
-
+- GF shell: ``cc -trace`` (preliminary): you can now do things like
+
``cc -trace mkV "debug"``
-
+
to see a trace of all opers with their arguments and results during the
computation of ``mkV "debug"``.
@@ -29,7 +25,7 @@ Over 170 changes have been pushed to the source repository since
from the GF shell by starting GF with ``gf -cshell`` or ``gf -crun``.
Only limited functionality is available when running the shell in these
modes (use the ``help`` command in the shell for details):
-
+
- You can only import ``.pgf`` files, not source files.
- The ``-retain`` flag can not be used and the commands that require it to
work are not available.
@@ -77,8 +73,3 @@ Over 170 changes have been pushed to the source repository since
you can leave ``&+`` uninterpreted instead of gluing the adjacent tokens.
This means that the output is left in a format that can be parsed in
a subsequent request.
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.7.t2t b/download/release-3.7.t2t
index 8b9badf50..73c52d974 100644
--- a/download/release-3.7.t2t
+++ b/download/release-3.7.t2t
@@ -1,10 +1,6 @@
GF 3.7 Release Notes
June 2015
-%!style:../css/style.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -40,7 +36,7 @@ Over 800 changes have been pushed to the source repository since
``BIND``, ``SOFT_BIND``, ``SOFT_SPACE``, ``CAPIT``, ``ALL_CAPIT`` and
``nonExist``.
- It is now possible to define callbacks for literals from the Haskell
- binding to the C runtime. This is used for instance in
+ binding to the C runtime. This is used for instance in
the Wide Coverage translator on the Web.
@@ -103,8 +99,3 @@ Over 800 changes have been pushed to the source repository since
unused for 24 hours, to keep memory use down in long running servers.
- PGF service: limit the number of parallel calls to the C run-time parse
function to 4 by default. The limit can be changed with the ``-j`` flag.
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.8.t2t b/download/release-3.8.t2t
index 669f1235b..05b7b7303 100644
--- a/download/release-3.8.t2t
+++ b/download/release-3.8.t2t
@@ -1,10 +1,6 @@
GF 3.8 Release Notes
June 2016
-%!style:../css/notes.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -25,7 +21,7 @@ Roughly 400 changes have been pushed to the source repository since
[universal dependency http://universaldependencies.org/] diagrams
in various formats, see ``help vd``.
- The C runtime now includes an experimental library for managing
- and querying ontologies built on top of the abstract syntax of
+ and querying ontologies built on top of the abstract syntax of
a grammar. Since the ontology is based on an abstract syntax,
it is language independent by design. For now the library is
only used in the GF Offline Translator. The library uses
@@ -100,7 +96,3 @@ Roughly 400 changes have been pushed to the source repository since
translations in the domain they cover.
You can change the order in which the selected grammars are tried
by dragging them up and down in the list.
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/download/release-3.9.t2t b/download/release-3.9.t2t
index fc1e71447..c6277adfb 100644
--- a/download/release-3.9.t2t
+++ b/download/release-3.9.t2t
@@ -1,10 +1,6 @@
GF 3.9 Release Notes
August 2017
-%!style:../css/notes.css
-%!postproc(html):
-%!postproc(html):
-
==Installation==
See the [download page http://www.grammaticalframework.org/download/index.html].
@@ -36,7 +32,7 @@ very innefficient for some grammars.
- A new .NET binding for the GF runtime is available.
-- The API in the Java binding is extended and it covers more from
+- The API in the Java binding is extended and it covers more from
the full functionality of the C runtime.
@@ -66,8 +62,3 @@ the full functionality of the C runtime.
- PGF service: support for language-specific depencency configurations in
``command=deptree``.
-
-
---------------------
-
-[www.grammaticalframework.org http://www.grammaticalframework.org]
diff --git a/favicon.ico b/favicon.ico
new file mode 100644
index 000000000..5545e6277
Binary files /dev/null and b/favicon.ico differ
diff --git a/gf.cabal b/gf.cabal
index 11830ddf0..4b4b01d86 100644
--- a/gf.cabal
+++ b/gf.cabal
@@ -1,5 +1,5 @@
name: gf
-version: 3.9-git
+version: 3.10.3-git
cabal-version: >= 1.22
build-type: Custom
@@ -11,7 +11,7 @@ description: GF, Grammatical Framework, is a programming language for multilingu
homepage: http://www.grammaticalframework.org/
bug-reports: https://github.com/GrammaticalFramework/gf-core/issues
maintainer: Thomas Hallgren
-tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2
+tested-with: GHC==7.10.3, GHC==8.0.2, GHC==8.2.2, GHC==8.4.3
data-dir: src
data-files:
@@ -42,7 +42,7 @@ data-files:
custom-setup
setup-depends:
base,
- Cabal >=1.4.0.0,
+ Cabal >=1.22.0.0,
directory,
filepath,
process >=1.0.1.1
@@ -67,13 +67,23 @@ flag network-uri
description: Get Network.URI from the network-uri package
default: True
-
executable gf
hs-source-dirs: src/programs
main-is: gf-main.hs
default-language: Haskell2010
- build-depends: pgf2, base, filepath, directory, time, time-compat, old-locale, pretty, mtl, array, random,
- process, haskeline, parallel>=3, exceptions, bytestring, utf8-string, containers
+ build-depends: pgf2,
+ base >= 4.6 && <5,
+ array,
+ containers,
+ bytestring,
+ utf8-string,
+ random,
+ pretty,
+ mtl,
+ exceptions,
+ ghc-prim,
+ filepath, directory>=1.2, time,
+ process, haskeline, parallel>=3, json
ghc-options: -threaded
if impl(ghc>=7.0)
@@ -89,6 +99,9 @@ executable gf
GF
GF.Support
GF.Text.Pretty
+ GF.Text.Lexing
+ GF.Grammar.Canonical
+
GF.Main GF.Compiler GF.Interactive
GF.Compile GF.CompileInParallel GF.CompileOne GF.Compile.GetGrammar
@@ -125,9 +138,9 @@ executable gf
GF.Compile.PGFtoJava
GF.Haskell
GF.Compile.ConcreteToHaskell
- GF.Compile.PGFtoJS
- GF.Compile.PGFtoProlog
- GF.Compile.PGFtoPython
+ GF.Compile.GrammarToCanonical
+ GF.Grammar.CanonicalJSON
+ GF.Compile.PGFtoJSON
GF.Compile.ReadFiles
GF.Compile.Rename
GF.Compile.SubExOpt
@@ -210,7 +223,7 @@ executable gf
Data.Binary.IEEE754
if flag(server)
- build-depends: httpd-shed>=0.4.0.3, network>=2.3 && <2.7, json,
+ build-depends: httpd-shed>=0.4.0.3, network>=2.3 && <2.7,
cgi>=3001.2.2.0
if flag(network-uri)
build-depends: network-uri>=2.6, network>=2.6
@@ -230,8 +243,6 @@ executable gf
CGIUtils
Cache
Fold
- ExampleDemo
- ExampleService
hs-source-dirs: src/server src/server/transfer src/example-based
if flag(interrupt)
diff --git a/index.html b/index.html
index 78d263be5..b053de68b 100644
--- a/index.html
+++ b/index.html
@@ -1,418 +1,390 @@
-
-
-
-GF - Grammatical Framework
-
-
-
-
-
-
-
+
+
+
+
-
+ GF - Grammatical Framework
+
-
-
-
Grammatical Framework
- A programming language for multilingual grammar applications
+
+
+
+
+
+
+
+
+
+
+
+
+
Grammatical Framework
+
A programming language for multilingual grammar applications
Summer School in Rule-Based Machine
- Translation in Alacant/Alicante (Spain), 11-21 July 2016
- featuring GF, Apertium, Matxin, and TectoMT.
- Summer
- School web page.
-
GF, Grammatical Framework, is a programming language for
-multilingual grammar applications. It is
-
-
-
a special-purpose language for grammars, like
+multilingual grammar applications. It is
+
+
+
a special-purpose language for grammars, like
YACC,
Bison,
Happy,
BNFC,
but not restricted to programming languages
-
a functional programming language, like
+
+
a functional programming language, like
Haskell,
Lisp,
OCaml,
SML,
Scheme,
but specialized to grammar writing
-
a development platform for natural language grammars, like
+
+
a development platform for natural language grammars, like
LKB,
XLE,
Regulus,
but based on functional programming and type theory
-
a categorial grammar formalism, like
+
+
a categorial grammar formalism, like
ACG,
CCG,
but specialized for multilingual grammars,
-
a logical framework, like
+
+
a logical framework, like
Agda,
Coq,
Isabelle,
but equipped with concrete syntax in addition to logic
-
a platform for machine translation, like
+
+
a platform for machine translation, like
Moses,
Apertium,
but based on deep structural analysis (and usually applied for
limited fragments of language).
-
+
+
-
+
Don't worry if you don't know most of the references above - but if you do know at
least one, it may help you to get a first idea of what GF is.
-
-GF is open-source, licensed under GPL (the program) and
-LGPL and BSD (the libraries). It
-is available for
-
-
-
Linux
-
Mac OS X
-
Windows
-
Android mobile platform (via Java; runtime)
-
via compilation to JavaScript, almost any platform that has a web browser (runtime)
-
-
-
Projects
-
-GF was first created in 1998 at
-Xerox Research Centre Europe,
-Grenoble, in the project
-Multilingual Document Authoring. At Xerox, it was used for prototypes including
-a restaurant phrase book,
-a database query system,
-a formalization of an alarm system instructions with translations to 5 languages, and
-an authoring system for medical drug descriptions.
-
-
-Later projects using GF and involving third parties include, in chronological order,
-
-
-
GF-Alfa:
- natural language interface to formal proofs
-
Efficient:
- authoring tool for business models.
-
GF-KeY:
- authoring and translation of software specifications
-
TALK:
- multilingual and multimodal spoken dialogue systems
-
WebALT:
- multilingual generation of mathematical exercises (commercial project)
-
-Academically, GF has been used in at least ten PhD theses and resulted
-in more than a hundred
-scientific publications (see GF publication list).
-
-
Programming in GF
-
-GF is easy to learn by following the tutorial.
-You can write your first translator in 15 minutes.
-
-
-GF has an interactive command interpreter, as well as a batch compiler.
-Grammars can be compiled to parser and translator code in many different
-formats. These components can then be embedded in applications written
-in other programming languages. The formats currently supported are:
-
-
-
Haskell
-
Java, in particular the Android platform
-
JavaScript
-
Speech recognition: HTK/ATK, Nuance, JSGF
-
-
-
-The GF programming language is high-level and advanced, featuring
-
-
-
static type checking
-
higher-order functions
-
dependent types
-
pattern matching with data constructors and regular expressions
-
module system with multiple inheritance and parametrized modules
-
-
-
Getting help
-
-If you need some help with GF, the first places to start are the Tutorial and Reference pages.
-The printed book contains all the material in the tutorial and some extra bits, and is the recommended reference for GF.
-
-We run the IRC channel #gf on the Freenode network, where you are welcome to look for help with small questions or just start a general discussion.
-IRC logs (in raw format) are available here.
-If you have a larger question which the community may benefit from, we recommend you ask it on the mailing list.
-
+
-
Libraries
-
-Libraries are at the heart of modern software engineering. In natural language
-applications, libraries are a way to cope with thousands of details involved in
-syntax, lexicon, and inflection. The
-GF resource grammar library has
-support for an increasing number of languages, currently including
-
+ GF is open-source, licensed under GPL (the program) and
+ LGPL and BSD (the libraries). It
+ is available for
+
+
+
Linux
+
macOS
+
Windows
+
Android mobile platform (via Java; runtime)
+
via compilation to JavaScript, almost any platform that has a web browser (runtime)
+
+
+
Programming in GF
+
+ GF is easy to learn by following the tutorial.
+ You can write your first translator in 15 minutes.
+
+
+ GF has an interactive command interpreter, as well as a batch compiler.
+ Grammars can be compiled to parser and translator code in many different
+ formats. These components can then be embedded in applications written
+ in other programming languages. The formats currently supported are:
+
+
+
Haskell
+
Java, in particular the Android platform
+
JavaScript
+
Speech recognition: HTK/ATK, Nuance, JSGF
+
+
+
+ The GF programming language is high-level and advanced, featuring:
+
+
+
static type checking
+
higher-order functions
+
dependent types
+
pattern matching with data constructors and regular expressions
+
module system with multiple inheritance and parametrized modules
+
+
+
Getting help
+
+ If you need some help with GF, the first places to start are the Tutorial and Reference pages.
+ The printed book contains all the material in the tutorial and some extra bits, and is the recommended reference for GF.
+
+
+
+ We run the IRC channel #gf on the Freenode network, where you are welcome to look for help with small questions or just start a general discussion.
+ You can open a web chat
+ or browse the channel logs.
+
+
+ If you have a larger question which the community may benefit from, we recommend you ask it on the mailing list.
+
+ GF was first created in 1998 at
+ Xerox Research Centre Europe,
+ Grenoble, in the project
+ Multilingual Document Authoring. At Xerox, it was used for prototypes including
+ a restaurant phrase book,
+ a database query system,
+ a formalization of an alarm system instructions with translations to 5 languages, and
+ an authoring system for medical drug descriptions.
+
+
+ Later projects using GF and involving third parties include, in chronological order:
+
+
+
+ GF-Alfa:
+ natural language interface to formal proofs
+
+
+ Efficient:
+ authoring tool for business models.
+
+
+ GF-KeY:
+ authoring and translation of software specifications
+
+
+ TALK:
+ multilingual and multimodal spoken dialogue systems
+
+ REMU:
+ reliable multilingual digital communication
+
+
+
+
+
+ Academically, GF has been used in at least ten PhD theses and resulted
+ in more than a hundred scientific publications.
+
+
+
+
Libraries
+
+ Libraries are at the heart of modern software engineering. In natural language
+ applications, libraries are a way to cope with thousands of details involved in
+ syntax, lexicon, and inflection. The
+ GF resource grammar library has
+ support for an increasing number of languages, currently including
+ Afrikaans,
+ Amharic (partial),
+ Arabic (partial),
+ Bulgarian,
+ Catalan,
+ Chinese,
+ Danish,
+ Dutch,
+ English,
+ Estonian,
+ Finnish,
+ French,
+ German,
+ Greek ancient (partial),
+ Greek modern,
+ Hebrew (fragments),
+ Hindi,
+ Interlingua,
+ Japanese,
+ Italian,
+ Latin (fragments),
+ Latvian,
+ Maltese,
+ Mongolian,
+ Nepali,
+ Norwegian bokmål,
+ Norwegian nynorsk,
+ Persian,
+ Polish,
+ Punjabi,
+ Romanian,
+ Russian,
+ Sindhi,
+ Slovene (partial),
+ Spanish,
+ Swahili (fragments),
+ Swedish,
+ Thai,
+ Turkish (fragments),
+ and
+ Urdu.
+
+
+
+ Adding a language to the resource library takes 3 to 9
+ months - contributions
+ are welcome! You can start with the resource grammarian's tutorial.
+
+
+
+
+
+
+
+
+
-
-
+
+
+
diff --git a/src/compiler/GF.hs b/src/compiler/GF.hs
index 8938a053e..a99970a57 100644
--- a/src/compiler/GF.hs
+++ b/src/compiler/GF.hs
@@ -19,7 +19,9 @@ module GF(
module GF.Grammar.Printer,
module GF.Infra.Ident,
-- ** Binary serialisation
- module GF.Grammar.Binary
+ module GF.Grammar.Binary,
+ -- * Canonical GF
+ module GF.Compile.GrammarToCanonical
) where
import GF.Main
import GF.Compiler
@@ -36,3 +38,5 @@ import GF.Grammar.Macros
import GF.Grammar.Printer
import GF.Infra.Ident
import GF.Grammar.Binary
+
+import GF.Compile.GrammarToCanonical
diff --git a/src/compiler/GF/Compile/CheckGrammar.hs b/src/compiler/GF/Compile/CheckGrammar.hs
index 5c1743b74..8d7021df0 100644
--- a/src/compiler/GF/Compile/CheckGrammar.hs
+++ b/src/compiler/GF/Compile/CheckGrammar.hs
@@ -146,11 +146,17 @@ checkCompleteGrammar opts cwd gr (am,abs) (cm,cnc) = checkInModule cwd cnc NoLoc
return $ updateTree (c,CncFun (Just linty) d mn mf) js
_ -> do checkWarn ("function" <+> c <+> "is not in abstract")
return js
- CncCat _ _ _ _ _ -> case lookupOrigInfo gr (am,c) of
- Ok _ -> return $ updateTree i js
- _ -> do checkWarn ("category" <+> c <+> "is not in abstract")
- return js
- _ -> return $ updateTree i js
+ CncCat {} ->
+ case lookupOrigInfo gr (am,c) of
+ Ok (_,AbsCat _) -> return $ updateTree i js
+ {- -- This might be too pedantic:
+ Ok (_,AbsFun {}) ->
+ checkError ("lincat:"<+>c<+>"is a fun, not a cat")
+ -}
+ _ -> do checkWarn ("category" <+> c <+> "is not in abstract")
+ return js
+
+ _ -> return $ updateTree i js
-- | General Principle: only Just-values are checked.
diff --git a/src/compiler/GF/Compile/ConcreteToHaskell.hs b/src/compiler/GF/Compile/ConcreteToHaskell.hs
index ad4775697..d74fcdacd 100644
--- a/src/compiler/GF/Compile/ConcreteToHaskell.hs
+++ b/src/compiler/GF/Compile/ConcreteToHaskell.hs
@@ -1,365 +1,351 @@
-- | Translate concrete syntax to Haskell
module GF.Compile.ConcreteToHaskell(concretes2haskell,concrete2haskell) where
-import Data.List(sort,sortBy)
-import Data.Function(on)
+import Data.List(isPrefixOf,sort,sortOn)
import qualified Data.Map as M
import qualified Data.Set as S
-import GF.Data.ErrM
-import GF.Data.Utilities(mapSnd)
import GF.Text.Pretty
-import GF.Grammar.Grammar
-import GF.Grammar.Lookup(lookupFunType,lookupOrigInfo,allOrigInfos)--,allParamValues
-import GF.Grammar.Macros(typeForm,collectOp,collectPattOp,mkAbs,mkApp)
-import GF.Grammar.Lockfield(isLockLabel)
-import GF.Grammar.Predef(cPredef,cInts)
-import GF.Compile.Compute.Predef(predef)
-import GF.Compile.Compute.Value(Predefined(..))
-import GF.Infra.Ident(Ident,identS,prefixIdent) --,moduleNameS
+--import GF.Grammar.Predef(cPredef,cInts)
+--import GF.Compile.Compute.Predef(predef)
+--import GF.Compile.Compute.Value(Predefined(..))
+import GF.Infra.Ident(Ident,identS,identW,prefixIdent)
import GF.Infra.Option
-import GF.Compile.Compute.ConcreteNew(normalForm,resourceValues)
-import GF.Haskell
-import Debug.Trace
+import GF.Haskell as H
+import GF.Grammar.Canonical as C
+import GF.Compile.GrammarToCanonical
+import Debug.Trace(trace)
-- | Generate Haskell code for the all concrete syntaxes associated with
-- the named abstract syntax in given the grammar.
concretes2haskell opts absname gr =
- [(cncname,concrete2haskell opts gr cenv absname cnc cncmod)
- | let cenv = resourceValues opts gr,
- cnc<-allConcretes gr absname,
- let cncname = render cnc ++ ".hs" :: FilePath
- Ok cncmod = lookupModule gr cnc
+ [(filename,render80 $ concrete2haskell opts abstr cncmod)
+ | let Grammar abstr cncs = grammar2canonical opts absname gr,
+ cncmod<-cncs,
+ let ModId name = concName cncmod
+ filename = name ++ ".hs" :: FilePath
]
-- | Generate Haskell code for the given concrete module.
-- The only options that make a difference are
-- @-haskell=noprefix@ and @-haskell=variants@.
-concrete2haskell opts gr cenv absname cnc modinfo =
- renderStyle style{lineLength=80,ribbonsPerLine=1} $
- haskPreamble va absname cnc $$ vcat (
- nl:Comment "--- Parameter types ---":
- neededParamTypes S.empty (params defs) ++
- nl:Comment "--- Type signatures for linearization functions ---":
- map signature (S.toList allcats)++
- nl:Comment "--- Linearization functions for empty categories ---":
- emptydefs ++
- nl:Comment "--- Linearization types and linearization functions ---":
- map ppDef defs ++
- nl:Comment "--- Type classes for projection functions ---":
- map labelClass (S.toList labels) ++
- nl:Comment "--- Record types ---":
- concatMap recordType recs)
+concrete2haskell opts
+ abstr@(Abstract _ _ cats funs)
+ modinfo@(Concrete cnc absname _ ps lcs lns) =
+ haskPreamble absname cnc $$
+ vcat (
+ nl:Comment "--- Parameter types ---":
+ map paramDef ps ++
+ nl:Comment "--- Type signatures for linearization functions ---":
+ map signature cats ++
+ nl:Comment "--- Linearization functions for empty categories ---":
+ emptydefs ++
+ nl:Comment "--- Linearization types ---":
+ map lincatDef lcs ++
+ nl:Comment "--- Linearization functions ---":
+ lindefs ++
+ nl:Comment "--- Type classes for projection functions ---":
+ map labelClass (S.toList labels) ++
+ nl:Comment "--- Record types ---":
+ concatMap recordType recs)
where
nl = Comment ""
+ recs = S.toList (S.difference (records (lcs,lns)) common_records)
+
labels = S.difference (S.unions (map S.fromList recs)) common_labels
- recs = S.toList (S.difference (records rhss) common_records)
common_records = S.fromList [[label_s]]
common_labels = S.fromList [label_s]
- label_s = ident2label (identS "s")
+ label_s = LabelId "s"
- rhss = map (either snd (snd.snd)) defs
- defs = sortBy (compare `on` either (const Nothing) (Just . fst)) .
- concatMap (toHaskell gId gr absname cenv) .
- M.toList $
- jments modinfo
-
--- signature c = "lin"<>c<+>"::"<+>"A."<>gId c<+>"->"<+>"Lin"<>c
--- signature c = "--lin"<>c<+>":: (Applicative f,Monad f) =>"<+>"A."<>gId c<+>"->"<+>"f Lin"<>c
- signature c = TypeSig lf (Fun abs (pure lin))
+ signature (CatDef c _) = TypeSig lf (Fun abs (pure lin))
where
abs = tcon0 (prefixIdent "A." (gId c))
lin = tcon0 lc
- lf = prefixIdent "lin" c
- lc = prefixIdent "Lin" c
+ lf = linfunName c
+ lc = lincatName c
emptydefs = map emptydef (S.toList emptyCats)
- emptydef c = Eqn (prefixIdent "lin" c,[WildP]) (Const "undefined")
+ emptydef c = Eqn (linfunName c,[WildP]) (Const "undefined")
- emptyCats = allcats `S.difference` cats
- cats = S.fromList [c|Right (c,_)<-defs]
- allcats = S.fromList [c|((_,c),AbsCat (Just _))<-allOrigInfos gr absname]
+ emptyCats = allcats `S.difference` linfuncats
+ where
+ --funcats = S.fromList [c | FunDef f (C.Type _ (TypeApp c _))<-funs]
+ allcats = S.fromList [c | CatDef c _<-cats]
+
+ gId :: ToIdent i => i -> Ident
+ gId = (if haskellOption opts HaskellNoPrefix then id else prefixIdent "G")
+ . toIdent
- params = S.toList . S.unions . map params1
- params1 (Left (_,rhs)) = paramTypes gr rhs
- params1 (Right (_,(_,rhs))) = tableTypes gr [rhs]
-
- ppDef (Left (lhs,rhs)) = lhs (convType va gId rhs)
- ppDef (Right (_,(lhs,rhs))) = lhs (convert va gId gr rhs)
-
- gId :: Ident -> Ident
- gId = if haskellOption opts HaskellNoPrefix then id else prefixIdent "G"
va = haskellOption opts HaskellVariants
pure = if va then ListT else id
- neededParamTypes have [] = []
- neededParamTypes have (q:qs) =
- if q `S.member` have
- then neededParamTypes have qs
- else let ((got,need),def) = paramType va gId gr q
- in def++neededParamTypes (S.union got have) (S.toList need++qs)
-
-haskPreamble :: Bool -> ModuleName -> ModuleName -> Doc
-haskPreamble va absname cncname =
- "{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleInstances, LambdaCase #-}" $$
- "module" <+> cncname <+> "where" $$
- "import Prelude hiding (Ordering(..))" $$
- "import Control.Applicative((<$>),(<*>))" $$
- "import PGF.Haskell" $$
- "import qualified" <+> absname <+> "as A" $$
- "" $$
- "--- Standard definitions ---" $$
- "linString (A.GString s) ="<+>pure "R_s [TK s]" $$
- "linInt (A.GInt i) ="<+>pure "R_s [TK (show i)]" $$
- "linFloat (A.GFloat x) ="<+>pure "R_s [TK (show x)]" $$
- "" $$
- "----------------------------------------------------" $$
- "-- Automatic translation from GF to Haskell follows" $$
- "----------------------------------------------------"
- where
- pure = if va then brackets else pp
-
-toHaskell gId gr absname cenv (name,jment) =
- case jment of
- CncCat (Just (L loc typ)) _ _ pprn _ ->
- [Left (tsyn0 (prefixIdent "Lin" name),nf loc typ)]
- CncFun (Just r@(cat,ctx,lincat)) (Just (L loc def)) pprn _ ->
--- trace (render (name<+>hcat[parens (x<>"::"<>t)|(_,x,t)<-ctx]<+>"::"<+>cat)) $
- [Right (cat,(Eqn (prefixIdent "lin" cat,lhs),coerce [] lincat rhs))]
+ haskPreamble :: ModId -> ModId -> Doc
+ haskPreamble absname cncname =
+ "{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleInstances, LambdaCase #-}" $$
+ "module" <+> cncname <+> "where" $$
+ "import Prelude hiding (Ordering(..))" $$
+ "import Control.Applicative((<$>),(<*>))" $$
+ "import PGF.Haskell" $$
+ "import qualified" <+> absname <+> "as A" $$
+ "" $$
+ "--- Standard definitions ---" $$
+ "linString (A.GString s) ="<+>pure "R_s [TK s]" $$
+ "linInt (A.GInt i) ="<+>pure "R_s [TK (show i)]" $$
+ "linFloat (A.GFloat x) ="<+>pure "R_s [TK (show x)]" $$
+ "" $$
+ "----------------------------------------------------" $$
+ "-- Automatic translation from GF to Haskell follows" $$
+ "----------------------------------------------------"
where
- Ok abstype = lookupFunType gr absname name
- (absctx,_abscat,_absargs) = typeForm abstype
+ pure = if va then brackets else pp
- e' = unAbs (length params) $
- nf loc (mkAbs params (mkApp def (map Vr args)))
- params = [(b,prefixIdent "g" x)|(b,x,_)<-ctx]
- args = map snd params
- abs_args = map (prefixIdent "abs_") args
- lhs = [ConP (aId name) (map VarP abs_args)]
- rhs = foldr letlin e' (zip args absctx)
- letlin (a,(_,_,at)) =
- Let (a,(Just (con ("Lin"++render at)),(App (con ("lin"++render at)) (con ("abs_"++render a)))))
- AnyInd _ m -> case lookupOrigInfo gr (m,name) of
- Ok (m,jment) -> toHaskell gId gr absname cenv (name,jment)
- _ -> []
- _ -> []
- where
- nf loc = normalForm cenv (L loc name)
- aId n = prefixIdent "A." (gId n)
+ paramDef pd =
+ case pd of
+ ParamAliasDef p t -> H.Type (conap0 (gId p)) (convLinType t)
+ ParamDef p pvs -> Data (conap0 (gId p)) (map paramCon pvs) derive
+ where
+ paramCon (Param c cs) = ConAp (gId c) (map (tcon0.gId) cs)
+ derive = ["Eq","Ord","Show"]
- unAbs 0 t = t
- unAbs n (Abs _ _ t) = unAbs (n-1) t
- unAbs _ t = t
+ convLinType = ppT
+ where
+ ppT t =
+ case t of
+ FloatType -> tcon0 (identS "Float")
+ IntType -> tcon0 (identS "Int")
+ ParamType (ParamTypeId p) -> tcon0 (gId p)
+ RecordType rs -> tcon (rcon' ls) (map ppT ts)
+ where (ls,ts) = unzip $ sortOn fst [(l,t)|RecordRow l t<-rs]
+ StrType -> tcon0 (identS "Str")
+ TableType pt lt -> Fun (ppT pt) (ppT lt)
+-- TupleType lts ->
+
+ lincatDef (LincatDef c t) = tsyn0 (lincatName c) (convLinType t)
+
+ linfuncats = S.fromList linfuncatl
+ (linfuncatl,lindefs) = unzip (linDefs lns)
+
+ linDefs = map eqn . sortOn fst . map linDef
+ where eqn (cat,(f,(ps,rhs))) = (cat,Eqn (f,ps) rhs)
+
+ linDef (LinDef f xs rhs0) =
+ (cat,(linfunName cat,(lhs,rhs)))
+ where
+ lhs = [ConP (aId f) (map VarP abs_args)]
+ aId f = prefixIdent "A." (gId f)
+
+ [lincat] = [lincat | LincatDef c lincat<-lcs,c==cat]
+ [C.Type absctx (TypeApp cat _)] = [t | FunDef f' t<-funs, f'==f]
+
+ abs_args = map abs_arg args
+ abs_arg = prefixIdent "abs_"
+ args = map (prefixIdent "g" . toIdent) xs
+
+ rhs = lets (zipWith letlin args absctx)
+ (convert vs (coerce env lincat rhs0))
+ where
+ vs = [(VarValueId (Unqual x),a)|(VarId x,a)<-zip xs args]
+ env= [(VarValueId (Unqual x),lc)|(VarId x,lc)<-zip xs (map arglincat absctx)]
+
+ letlin a (TypeBinding _ (C.Type _ (TypeApp acat _))) =
+ (a,Ap (Var (linfunName acat)) (Var (abs_arg a)))
+
+ arglincat (TypeBinding _ (C.Type _ (TypeApp acat _))) = lincat
+ where
+ [lincat] = [lincat | LincatDef c lincat<-lcs,c==acat]
+
+ convert = convert' va
+
+ convert' va vs = ppT
+ where
+ ppT0 = convert' False vs
+ ppTv vs' = convert' va vs'
+
+ pure = if va then single else id
+
+ ppT t =
+ case t of
+ TableValue ty cs -> pure (table cs)
+ Selection t p -> select (ppT t) (ppT p)
+ ConcatValue t1 t2 -> concat (ppT t1) (ppT t2)
+ RecordValue r -> aps (rcon ls) (map ppT ts)
+ where (ls,ts) = unzip $ sortOn fst [(l,t)|RecordRow l t<-r]
+ PredefValue p -> single (Var (toIdent p)) -- hmm
+ Projection t l -> ap (proj l) (ppT t)
+ VariantValue [] -> empty
+ VariantValue ts@(_:_) -> variants ts
+ VarValue x -> maybe (Var (gId x)) (pure . Var) $ lookup x vs
+ PreValue vs t' -> pure (alts t' vs)
+ ParamConstant (Param c vs) -> aps (Var (pId c)) (map ppT vs)
+ ErrorValue s -> ap (Const "error") (Const (show s)) -- !!
+ LiteralValue l -> ppL l
+ _ -> error ("convert "++show t)
+
+ ppL l =
+ case l of
+ FloatConstant x -> pure (lit x)
+ IntConstant n -> pure (lit n)
+ StrConstant s -> pure (token s)
+
+ pId p@(ParamId s) =
+ if "to_R_" `isPrefixOf` unqual s then toIdent p else gId p -- !! a hack
+
+ table cs =
+ if all (null.patVars) ps
+ then lets ds (LambdaCase [(ppP p,t')|(p,t')<-zip ps ts'])
+ else LambdaCase (map ppCase cs)
+ where
+ (ds,ts') = dedup ts
+ (ps,ts) = unzip [(p,t)|TableRow p t<-cs]
+ ppCase (TableRow p t) = (ppP p,ppTv (patVars p++vs) t)
+{-
+ ppPredef n =
+ case predef n of
+ Ok BIND -> single (c "BIND")
+ Ok SOFT_BIND -> single (c "SOFT_BIND")
+ Ok SOFT_SPACE -> single (c "SOFT_SPACE")
+ Ok CAPIT -> single (c "CAPIT")
+ Ok ALL_CAPIT -> single (c "ALL_CAPIT")
+ _ -> Var n
+-}
+ ppP p =
+ case p of
+ ParamPattern (Param c ps) -> ConP (gId c) (map ppP ps)
+ RecordPattern r -> ConP (rcon' ls) (map ppP ps)
+ where (ls,ps) = unzip $ sortOn fst [(l,p)|RecordRow l p<-r]
+ WildPattern -> WildP
+
+ token s = single (c "TK" `Ap` lit s)
+
+ alts t' vs = single (c "TP" `Ap` List (map alt vs) `Ap` ppT0 t')
+ where
+ alt (s,t) = Pair (List (pre s)) (ppT0 t)
+ pre s = map lit s
+
+ c = Const
+ lit s = c (show s) -- hmm
+ concat = if va then concat' else plusplus
+ where
+ concat' (List [List ts1]) (List [List ts2]) = List [List (ts1++ts2)]
+ concat' t1 t2 = Op t1 "+++" t2
+
+ pure' = single -- forcing the list monad
+
+ select = if va then select' else Ap
+ select' (List [t]) (List [p]) = Op t "!" p
+ select' (List [t]) p = Op t "!$" p
+ select' t p = Op t "!*" p
+
+ ap = if va then ap' else Ap
+ where
+ ap' (List [f]) x = fmap f x
+ ap' f x = Op f "<*>" x
+ fmap f (List [x]) = pure' (Ap f x)
+ fmap f x = Op f "<$>" x
+
+ -- join = if va then join' else id
+ join' (List [x]) = x
+ join' x = c "concat" `Ap` x
+
+ empty = if va then List [] else c "error" `Ap` c (show "empty variant")
+ variants = if va then \ ts -> join' (List (map ppT ts))
+ else \ (t:_) -> ppT t
+
+ aps f [] = f
+ aps f (a:as) = aps (ap f a) as
+
+ dedup ts =
+ if M.null dups
+ then ([],map ppT ts)
+ else ([(ev i,ppT t)|(i,t)<-defs],zipWith entry ts is)
+ where
+ entry t i = maybe (ppT t) (Var . ev) (M.lookup i dups)
+ ev i = identS ("e'"++show i)
+
+ defs = [(i1,t)|(t,i1:_:_)<-ms]
+ dups = M.fromList [(i2,i1)|(_,i1:is@(_:_))<-ms,i2<-i1:is]
+ ms = M.toList m
+ m = fmap sort (M.fromListWith (++) (zip ts [[i]|i<-is]))
+ is = [0..]::[Int]
-con = Cn . identS
+--con = Cn . identS
-tableTypes gr ts = S.unions (map tabtys ts)
- where
- tabtys t =
- case t of
- V t cc -> S.union (paramTypes gr t) (tableTypes gr cc)
- T (TTyped t) cs -> S.union (paramTypes gr t) (tableTypes gr (map snd cs))
- _ -> collectOp tabtys t
+class Records t where
+ records :: t -> S.Set [LabelId]
-paramTypes gr t =
- case t of
- RecType fs -> S.unions (map (paramTypes gr.snd) fs)
- Table t1 t2 -> S.union (paramTypes gr t1) (paramTypes gr t2)
- App tf ta -> S.union (paramTypes gr tf) (paramTypes gr ta)
- Sort _ -> S.empty
- EInt _ -> S.empty
- Q q -> lookup q
- QC q -> lookup q
- FV ts -> S.unions (map (paramTypes gr) ts)
- _ -> ignore
- where
- lookup q = case lookupOrigInfo gr q of
- Ok (_,ResOper _ (Just (L _ t))) ->
- S.insert q (paramTypes gr t)
- Ok (_,ResParam {}) -> S.singleton q
- _ -> ignore
+instance Records t => Records [t] where
+ records = S.unions . map records
- ignore = trace ("Ignore: "++show t) S.empty
-
-
-
-records ts = S.unions (map recs ts)
- where
- recs t =
- case t of
- R r -> S.insert (labels r) (records (map (snd.snd) r))
- RecType r -> S.insert (labels r) (records (map snd r))
- _ -> collectOp recs t
-
- labels = sort . filter (not . isLockLabel) . map fst
+instance (Records t1,Records t2) => Records (t1,t2) where
+ records (t1,t2) = S.union (records t1) (records t2)
+
+instance Records LincatDef where
+ records (LincatDef _ lt) = records lt
+
+instance Records LinDef where
+ records (LinDef _ _ lv) = records lv
+
+instance Records LinType where
+ records t =
+ case t of
+ RecordType r -> rowRecords r
+ TableType pt lt -> records (pt,lt)
+ TupleType ts -> records ts
+ _ -> S.empty
+
+rowRecords r = S.insert (sort ls) (records ts)
+ where (ls,ts) = unzip [(l,t)|RecordRow l t<-r]
+
+instance Records LinValue where
+ records v =
+ case v of
+ ConcatValue v1 v2 -> records (v1,v2)
+ ParamConstant (Param c vs) -> records vs
+ RecordValue r -> rowRecords r
+ TableValue t r -> records (t,r)
+ TupleValue vs -> records vs
+ VariantValue vs -> records vs
+ PreValue alts d -> records (map snd alts,d)
+ Projection v l -> records v
+ Selection v1 v2 -> records (v1,v2)
+ _ -> S.empty
+
+instance Records rhs => Records (TableRow rhs) where
+ records (TableRow _ v) = records v
+-- | Record subtyping is converted into explicit coercions in Haskell
coerce env ty t =
case (ty,t) of
- (_,Let d t) -> Let d (coerce (extend env d) ty t)
- (_,FV ts) -> FV (map (coerce env ty) ts)
- (Table ti tv,V _ ts) -> V ti (map (coerce env tv) ts)
- (Table ti tv,T (TTyped _) cs) -> T (TTyped ti) (mapSnd (coerce env tv) cs)
- (RecType rt,R r) ->
- R [(l,(Just ft,coerce env ft f))|(l,(_,f))<-r,Just ft<-[lookup l rt]]
- (RecType rt,Vr x)->
+ (_,VariantValue ts) -> VariantValue (map (coerce env ty) ts)
+ (TableType ti tv,TableValue _ cs) ->
+ TableValue ti [TableRow p (coerce env tv t)|TableRow p t<-cs]
+ (RecordType rt,RecordValue r) ->
+ RecordValue [RecordRow l (coerce env ft f) |
+ RecordRow l f<-r,ft<-[ft|RecordRow l' ft<-rt,l'==l]]
+ (RecordType rt,VarValue x)->
case lookup x env of
Just ty' | ty'/=ty -> -- better to compare to normal form of ty'
- --trace ("coerce "++render ty'++" to "++render ty) $
- App (to_rcon (map fst rt)) t
- _ -> trace ("no coerce to "++render ty) t
+ --trace ("coerce "++render ty'++" to "++render ty) $
+ app (to_rcon rt) [t]
+ | otherwise -> t -- types match, no coercion needed
+ _ -> trace (render ("missing type to coerce"<+>x<+>"to"<+>render ty
+ $$ "in" <+> map fst env))
+ t
_ -> t
where
- extend env (x,(Just ty,rhs)) = (x,ty):env
- extend env _ = env
+ app f ts = ParamConstant (Param f ts) -- !! a hack
+ to_rcon = ParamId . Unqual . to_rcon' . labels
-convert va gId gr = convert' va gId [] gr
+patVars p = []
-convert' va gId vs gr = ppT
- where
- ppT0 = convert' False gId vs gr
- ppTv vs' = convert' va gId vs' gr
+labels r = [l|RecordRow l _<-r]
- ppT t =
- case t of
- -- Only for 'let' inserted on the top-level by this converter:
- Let (x,(_,xt)) t -> let1 x (ppT0 xt) (ppT t)
--- Abs b x t -> ...
- V ty ts -> pure (c "table" `Ap` dedup ts)
- T (TTyped ty) cs -> pure (LambdaCase (map ppCase cs))
- S t p -> select (ppT t) (ppT p)
- C t1 t2 -> concat (ppT t1) (ppT t2)
- App f a -> ap (ppT f) (ppT a)
- R r -> aps (ppT (rcon (map fst r))) (fields r)
- P t l -> ap (ppT (proj l)) (ppT t)
- FV [] -> empty
- Vr x -> if x `elem` vs then pure (Var x) else Var x
- Cn x -> pure (Var x)
- Con c -> pure (Var (gId c))
- Sort k -> pure (Var k)
- EInt n -> pure (lit n)
- Q (m,n) -> if m==cPredef then pure (ppPredef n) else Var (qual m n)
- QC (m,n) -> pure (Var (gId (qual m n)))
- K s -> pure (token s)
- Empty -> pure (List [])
- FV ts@(_:_) -> variants ts
- Alts t' vs -> pure (alts t' vs)
-
- ppCase (p,t) = (ppP p,ppTv (patVars p++vs) t)
-
- ppPredef n =
- case predef n of
- Ok BIND -> single (c "BIND")
- Ok SOFT_BIND -> single (c "SOFT_BIND")
- Ok SOFT_SPACE -> single (c "SOFT_SPACE")
- Ok CAPIT -> single (c "CAPIT")
- Ok ALL_CAPIT -> single (c "ALL_CAPIT")
- _ -> Var n
-
- ppP p =
- case p of
- PC c ps -> ConP (gId c) (map ppP ps)
- PP (_,c) ps -> ConP (gId c) (map ppP ps)
- PR r -> ConP (rcon' (map fst r)) (map (ppP.snd) (filter (not.isLockLabel.fst) r))
- PW -> WildP
- PV x -> VarP x
- PString s -> Lit (show s) -- !!
- PInt i -> Lit (show i)
- PFloat x -> Lit (show x)
- PT _ p -> ppP p
- PAs x p -> AsP x (ppP p)
-
- token s = single (c "TK" `Ap` lit s)
-
- alts t' vs = single (c "TP" `Ap` List (map alt vs) `Ap` ppT0 t')
- where
- alt (t,p) = Pair (List (pre p)) (ppT0 t)
-
- pre (K s) = [lit s]
- pre (Strs ts) = concatMap pre ts
- pre (EPatt p) = pat p
- pre t = error $ "pre "++show t
-
- pat (PString s) = [lit s]
- pat (PAlt p1 p2) = pat p1++pat p2
- pat p = error $ "pat "++show p
-
- fields = map (ppT.snd.snd) . sort . filter (not.isLockLabel.fst)
-
- c = Const
- lit s = c (show s) -- hmm
- concat = if va then concat' else plusplus
- where
- concat' (List [List ts1]) (List [List ts2]) = List [List (ts1++ts2)]
- concat' t1 t2 = Op t1 "+++" t2
- pure = if va then single else id
- pure' = single -- forcing the list monad
-
- select = if va then select' else Ap
- select' (List [t]) (List [p]) = Op t "!" p
- select' (List [t]) p = Op t "!$" p
- select' t p = Op t "!*" p
-
- ap = if va then ap' else Ap
- where
- ap' (List [f]) x = fmap f x
- ap' f x = Op f "<*>" x
- fmap f (List [x]) = pure' (Ap f x)
- fmap f x = Op f "<$>" x
-
--- join = if va then join' else id
- join' (List [x]) = x
- join' x = c "concat" `Ap` x
-
- empty = if va then List [] else c "error" `Ap` c (show "empty variant")
- variants = if va then \ ts -> join' (List (map ppT ts))
- else \ (t:_) -> ppT t
-
- aps f [] = f
- aps f (a:as) = aps (ap f a) as
-
- dedup ts =
- if M.null dups
- then List (map ppT ts)
- else Lets [(ev i,ppT t)|(i,t)<-defs] (List (zipWith entry ts is))
- where
- entry t i = maybe (ppT t) (Var . ev) (M.lookup i dups)
- ev i = identS ("e'"++show i)
-
- defs = [(i1,t)|(t,i1:_:_)<-ms]
- dups = M.fromList [(i2,i1)|(_,i1:is@(_:_))<-ms,i2<-i1:is]
- ms = M.toList m
- m = fmap sort (M.fromListWith (++) (zip ts [[i]|i<-is]))
- is = [0..]::[Int]
-
-patVars p =
- case p of
- PV x -> [x]
- PAs x p -> x:patVars p
- _ -> collectPattOp patVars p
-
-convType va gId = ppT
- where
- ppT t =
- case t of
- Table ti tv -> Fun (ppT ti) (if va then ListT (ppT tv) else ppT tv)
- RecType rt -> tcon (rcon' (map fst rt)) (fields rt)
- App tf ta -> TAp (ppT tf) (ppT ta)
- FV [] -> tcon0 (identS "({-empty variant-})")
- Sort k -> tcon0 k
- EInt n -> tcon0 (identS ("({-"++show n++"-})")) -- type level numeric literal
- FV (t:ts) -> ppT t -- !!
- QC (m,n) -> tcon0 (gId (qual m n))
- Q (m,n) -> tcon0 (gId (qual m n))
- _ -> error $ "Missing case in convType for: "++show t
-
- fields = map (ppT.snd) . sort . filter (not.isLockLabel.fst)
-
-proj = con . proj'
-proj' l = "proj_"++render l
-rcon = con . rcon_name
+proj = Var . identS . proj'
+proj' (LabelId l) = "proj_"++l
+rcon = Var . rcon'
rcon' = identS . rcon_name
-rcon_name ls = "R"++concat (sort ['_':render l|l<-ls,not (isLockLabel l)])
-to_rcon = con . to_rcon'
+rcon_name ls = "R"++concat (sort ['_':l|LabelId l<-ls])
+
to_rcon' = ("to_"++) . rcon_name
recordType ls =
@@ -400,31 +386,6 @@ labelClass l =
r = identS "r"
a = identS "a"
-paramType va gId gr q@(_,n) =
- case lookupOrigInfo gr q of
- Ok (m,ResParam (Just (L _ ps)) _)
- {- - | m/=cPredef && m/=moduleNameS "Prelude"-} ->
- ((S.singleton (m,n),argTypes ps),
- [Data (conap0 name) (map (param m) ps)["Eq","Ord","Show"],
- Instance [] (TId (identS "EnumAll") `TAp` TId name)
- [(lhs0 "enumAll",foldr1 plusplus (map (enumParam m) ps))]]
- )
- where name = gId (qual m n)
- Ok (m,ResOper _ (Just (L _ t)))
- | m==cPredef && n==cInts ->
- ((S.singleton (m,n),S.empty),
- [Type (ConAp (gId (qual m n)) [identS "n"]) (TId (identS "Int"))])
- | otherwise ->
- ((S.singleton (m,n),paramTypes gr t),
- [Type (conap0 (gId (qual m n))) (convType va gId t)])
- _ -> ((S.empty,S.empty),[])
- where
- param m (n,ctx) = ConAp (gId (qual m n)) [convType va gId t|(_,_,t)<-ctx]
- argTypes = S.unions . map argTypes1
- argTypes1 (n,ctx) = S.unions [paramTypes gr t|(_,_,t)<-ctx]
-
- enumParam m (n,ctx) = enumCon (gId (qual m n)) (length ctx)
-
enumCon name arity =
if arity==0
then single (Var name)
@@ -433,5 +394,23 @@ enumCon name arity =
ap (List [f]) a = Op f "<$>" a
ap f a = Op f "<*>" a
-qual :: ModuleName -> Ident -> Ident
-qual m = prefixIdent (render m++"_")
+lincatName,linfunName :: CatId -> Ident
+lincatName c = prefixIdent "Lin" (toIdent c)
+linfunName c = prefixIdent "lin" (toIdent c)
+
+class ToIdent i where toIdent :: i -> Ident
+
+instance ToIdent ParamId where toIdent (ParamId q) = qIdentS q
+instance ToIdent PredefId where toIdent (PredefId s) = identS s
+instance ToIdent CatId where toIdent (CatId s) = identS s
+instance ToIdent C.FunId where toIdent (FunId s) = identS s
+instance ToIdent VarValueId where toIdent (VarValueId q) = qIdentS q
+
+qIdentS = identS . unqual
+
+unqual (Qual (ModId m) n) = m++"_"++n
+unqual (Unqual n) = n
+
+instance ToIdent VarId where
+ toIdent Anonymous = identW
+ toIdent (VarId s) = identS s
diff --git a/src/compiler/GF/Compile/Export.hs b/src/compiler/GF/Compile/Export.hs
index 4de408db4..eee9a72e8 100644
--- a/src/compiler/GF/Compile/Export.hs
+++ b/src/compiler/GF/Compile/Export.hs
@@ -1,11 +1,10 @@
module GF.Compile.Export where
-import PGF
+import PGF2
import GF.Compile.PGFtoHaskell
+--import GF.Compile.PGFtoAbstract
import GF.Compile.PGFtoJava
-import GF.Compile.PGFtoProlog
-import GF.Compile.PGFtoJS
-import GF.Compile.PGFtoPython
+import GF.Compile.PGFtoJSON
import GF.Infra.Option
--import GF.Speech.CFG
import GF.Speech.PGFToCFG
@@ -19,6 +18,7 @@ import GF.Speech.SLF
import GF.Speech.PrRegExp
import Data.Maybe
+import qualified Data.Map as Map
import System.FilePath
import GF.Text.Pretty
@@ -33,11 +33,11 @@ exportPGF :: Options
exportPGF opts fmt pgf =
case fmt of
FmtPGFPretty -> multi "txt" (showPGF)
- FmtJavaScript -> multi "js" pgf2js
- FmtPython -> multi "py" pgf2python
+ FmtCanonicalGF -> [] -- canon "gf" (render80 . abstract2canonical)
+ FmtCanonicalJson-> []
+ FmtJSON -> multi "json" pgf2json
FmtHaskell -> multi "hs" (grammar2haskell opts name)
FmtJava -> multi "java" (grammar2java opts name)
- FmtProlog -> multi "pl" grammar2prolog
FmtBNF -> single "bnf" bnfPrinter
FmtEBNF -> single "ebnf" (ebnfPrinter opts)
FmtSRGS_XML -> single "grxml" (srgsXmlPrinter opts)
@@ -51,17 +51,13 @@ exportPGF opts fmt pgf =
FmtRegExp -> single "rexp" regexpPrinter
FmtFA -> single "dot" slfGraphvizPrinter
where
- name = fromMaybe (showCId (abstractName pgf)) (flag optName opts)
+ name = fromMaybe (abstractName pgf) (flag optName opts)
multi :: String -> (PGF -> String) -> [(FilePath,String)]
multi ext pr = [(name <.> ext, pr pgf)]
- single :: String -> (PGF -> CId -> String) -> [(FilePath,String)]
- single ext pr = [(showCId cnc <.> ext, pr pgf cnc) | cnc <- languages pgf]
+-- canon ext pr = [("canonical">name<.>ext,pr pgf)]
+
+ single :: String -> (PGF -> Concr -> String) -> [(FilePath,String)]
+ single ext pr = [(concreteName cnc <.> ext, pr pgf cnc) | cnc <- Map.elems (languages pgf)]
--- | Get the name of the concrete syntax to generate output from.
--- FIXME: there should be an option to change this.
-outputConcr :: PGF -> CId
-outputConcr pgf = case languages pgf of
- [] -> error "No concrete syntax."
- cnc:_ -> cnc
diff --git a/src/compiler/GF/Compile/GrammarToCanonical.hs b/src/compiler/GF/Compile/GrammarToCanonical.hs
new file mode 100644
index 000000000..33f35ad08
--- /dev/null
+++ b/src/compiler/GF/Compile/GrammarToCanonical.hs
@@ -0,0 +1,389 @@
+-- | Translate grammars to Canonical form
+-- (a common intermediate representation to simplify export to other formats)
+module GF.Compile.GrammarToCanonical(
+ grammar2canonical,abstract2canonical,concretes2canonical,
+ projection,selection
+ ) where
+import Data.List(nub,partition)
+import qualified Data.Map as M
+import qualified Data.Set as S
+import GF.Data.ErrM
+import GF.Text.Pretty
+import GF.Grammar.Grammar
+import GF.Grammar.Lookup(lookupOrigInfo,allOrigInfos,allParamValues)
+import GF.Grammar.Macros(typeForm,collectOp,collectPattOp,mkAbs,mkApp,term2patt)
+import GF.Grammar.Lockfield(isLockLabel)
+import GF.Grammar.Predef(cPredef,cInts)
+import GF.Compile.Compute.Predef(predef)
+import GF.Compile.Compute.Value(Predefined(..))
+import GF.Infra.Ident(ModuleName(..),Ident,prefixIdent,showIdent,isWildIdent)
+import GF.Infra.Option(optionsPGF)
+import PGF.Internal(Literal(..))
+import GF.Compile.Compute.ConcreteNew(normalForm,resourceValues)
+import GF.Grammar.Canonical as C
+import Debug.Trace
+
+-- | Generate Canonical code for the named abstract syntax and all associated
+-- concrete syntaxes
+grammar2canonical opts absname gr =
+ Grammar (abstract2canonical absname gr)
+ (map snd (concretes2canonical opts absname gr))
+
+-- | Generate Canonical code for the named abstract syntax
+abstract2canonical absname gr =
+ Abstract (modId absname) (convFlags gr absname) cats funs
+ where
+ cats = [CatDef (gId c) (convCtx ctx) | ((_,c),AbsCat ctx) <- adefs]
+
+ funs = [FunDef (gId f) (convType ty) |
+ ((_,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs]
+
+ adefs = allOrigInfos gr absname
+
+ convCtx = maybe [] (map convHypo . unLoc)
+ convHypo (bt,name,t) =
+ case typeForm t of
+ ([],(_,cat),[]) -> gId cat -- !!
+
+ convType t =
+ case typeForm t of
+ (hyps,(_,cat),args) -> Type bs (TypeApp (gId cat) as)
+ where
+ bs = map convHypo' hyps
+ as = map convType args
+
+ convHypo' (bt,name,t) = TypeBinding (gId name) (convType t)
+
+
+-- | Generate Canonical code for the all concrete syntaxes associated with
+-- the named abstract syntax in given the grammar.
+concretes2canonical opts absname gr =
+ [(cncname,concrete2canonical gr cenv absname cnc cncmod)
+ | let cenv = resourceValues opts gr,
+ cnc<-allConcretes gr absname,
+ let cncname = "canonical/"++render cnc ++ ".gf" :: FilePath
+ Ok cncmod = lookupModule gr cnc
+ ]
+
+-- | Generate Canonical GF for the given concrete module.
+concrete2canonical gr cenv absname cnc modinfo =
+ Concrete (modId cnc) (modId absname) (convFlags gr cnc)
+ (neededParamTypes S.empty (params defs))
+ [lincat|(_,Left lincat)<-defs]
+ [lin|(_,Right lin)<-defs]
+ where
+ defs = concatMap (toCanonical gr absname cenv) .
+ M.toList $
+ jments modinfo
+
+ params = S.toList . S.unions . map fst
+
+ neededParamTypes have [] = []
+ neededParamTypes have (q:qs) =
+ if q `S.member` have
+ then neededParamTypes have qs
+ else let ((got,need),def) = paramType gr q
+ in def++neededParamTypes (S.union got have) (S.toList need++qs)
+
+toCanonical gr absname cenv (name,jment) =
+ case jment of
+ CncCat (Just (L loc typ)) _ _ pprn _ ->
+ [(pts,Left (LincatDef (gId name) (convType ntyp)))]
+ where
+ pts = paramTypes gr ntyp
+ ntyp = nf loc typ
+ CncFun (Just r@(cat,ctx,lincat)) (Just (L loc def)) pprn _ ->
+ [(tts,Right (LinDef (gId name) (map gId args) (convert gr e')))]
+ where
+ tts = tableTypes gr [e']
+
+ e' = unAbs (length params) $
+ nf loc (mkAbs params (mkApp def (map Vr args)))
+ params = [(b,x)|(b,x,_)<-ctx]
+ args = map snd params
+
+ AnyInd _ m -> case lookupOrigInfo gr (m,name) of
+ Ok (m,jment) -> toCanonical gr absname cenv (name,jment)
+ _ -> []
+ _ -> []
+ where
+ nf loc = normalForm cenv (L loc name)
+-- aId n = prefixIdent "A." (gId n)
+
+ unAbs 0 t = t
+ unAbs n (Abs _ _ t) = unAbs (n-1) t
+ unAbs _ t = t
+
+tableTypes gr ts = S.unions (map tabtys ts)
+ where
+ tabtys t =
+ case t of
+ V t cc -> S.union (paramTypes gr t) (tableTypes gr cc)
+ T (TTyped t) cs -> S.union (paramTypes gr t) (tableTypes gr (map snd cs))
+ _ -> collectOp tabtys t
+
+paramTypes gr t =
+ case t of
+ RecType fs -> S.unions (map (paramTypes gr.snd) fs)
+ Table t1 t2 -> S.union (paramTypes gr t1) (paramTypes gr t2)
+ App tf ta -> S.union (paramTypes gr tf) (paramTypes gr ta)
+ Sort _ -> S.empty
+ EInt _ -> S.empty
+ Q q -> lookup q
+ QC q -> lookup q
+ FV ts -> S.unions (map (paramTypes gr) ts)
+ _ -> ignore
+ where
+ lookup q = case lookupOrigInfo gr q of
+ Ok (_,ResOper _ (Just (L _ t))) ->
+ S.insert q (paramTypes gr t)
+ Ok (_,ResParam {}) -> S.singleton q
+ _ -> ignore
+
+ ignore = trace ("Ignore: "++show t) S.empty
+
+
+convert gr = convert' gr []
+
+convert' gr vs = ppT
+ where
+ ppT0 = convert' gr vs
+ ppTv vs' = convert' gr vs'
+
+ ppT t =
+ case t of
+-- Abs b x t -> ...
+-- V ty ts -> VTableValue (convType ty) (map ppT ts)
+ V ty ts -> TableValue (convType ty) [TableRow (ppP p) (ppT t)|(p,t)<-zip ps ts]
+ where
+ Ok pts = allParamValues gr ty
+ Ok ps = mapM term2patt pts
+ T (TTyped ty) cs -> TableValue (convType ty) (map ppCase cs)
+ S t p -> selection (ppT t) (ppT p)
+ C t1 t2 -> concatValue (ppT t1) (ppT t2)
+ App f a -> ap (ppT f) (ppT a)
+ R r -> RecordValue (fields r)
+ P t l -> projection (ppT t) (lblId l)
+ Vr x -> VarValue (gId x)
+ Cn x -> VarValue (gId x) -- hmm
+ Con c -> ParamConstant (Param (gId c) [])
+ Sort k -> VarValue (gId k)
+ EInt n -> LiteralValue (IntConstant n)
+ Q (m,n) -> if m==cPredef then ppPredef n else VarValue ((gQId m n))
+ QC (m,n) -> ParamConstant (Param ((gQId m n)) [])
+ K s -> LiteralValue (StrConstant s)
+ Empty -> LiteralValue (StrConstant "")
+ FV ts -> VariantValue (map ppT ts)
+ Alts t' vs -> alts vs (ppT t')
+ _ -> error $ "convert' "++show t
+
+ ppCase (p,t) = TableRow (ppP p) (ppTv (patVars p++vs) t)
+
+ ppPredef n =
+ case predef n of
+ Ok BIND -> p "BIND"
+ Ok SOFT_BIND -> p "SOFT_BIND"
+ Ok SOFT_SPACE -> p "SOFT_SPACE"
+ Ok CAPIT -> p "CAPIT"
+ Ok ALL_CAPIT -> p "ALL_CAPIT"
+ _ -> VarValue (gQId cPredef n) -- hmm
+ where
+ p = PredefValue . PredefId
+
+ ppP p =
+ case p of
+ PC c ps -> ParamPattern (Param (gId c) (map ppP ps))
+ PP (m,c) ps -> ParamPattern (Param ((gQId m c)) (map ppP ps))
+ PR r -> RecordPattern (fields r) {-
+ PW -> WildPattern
+ PV x -> VarP x
+ PString s -> Lit (show s) -- !!
+ PInt i -> Lit (show i)
+ PFloat x -> Lit (show x)
+ PT _ p -> ppP p
+ PAs x p -> AsP x (ppP p) -}
+ where
+ fields = map field . filter (not.isLockLabel.fst)
+ field (l,p) = RecordRow (lblId l) (ppP p)
+
+-- patToParam p = case ppP p of ParamPattern pv -> pv
+
+-- token s = single (c "TK" `Ap` lit s)
+
+ alts vs = PreValue (map alt vs)
+ where
+ alt (t,p) = (pre p,ppT0 t)
+
+ pre (K s) = [s]
+ pre Empty = [""] -- Empty == K ""
+ pre (Strs ts) = concatMap pre ts
+ pre (EPatt p) = pat p
+ pre t = error $ "pre "++show t
+
+ pat (PString s) = [s]
+ pat (PAlt p1 p2) = pat p1++pat p2
+ pat (PSeq p1 p2) = [s1++s2 | s1<-pat p1, s2<-pat p2]
+ pat p = error $ "pat "++show p
+
+ fields = map field . filter (not.isLockLabel.fst)
+ field (l,(_,t)) = RecordRow (lblId l) (ppT t)
+ --c = Const
+ --c = VarValue . VarValueId
+ --lit s = c (show s) -- hmm
+
+ ap f a = case f of
+ ParamConstant (Param p ps) ->
+ ParamConstant (Param p (ps++[a]))
+ _ -> error $ "convert' ap: "++render (ppA f <+> ppA a)
+
+concatValue v1 v2 =
+ case (v1,v2) of
+ (LiteralValue (StrConstant ""),_) -> v2
+ (_,LiteralValue (StrConstant "")) -> v1
+ _ -> ConcatValue v1 v2
+
+-- | Smart constructor for projections
+projection r l = maybe (Projection r l) id (proj r l)
+
+proj r l =
+ case r of
+ RecordValue r -> case [v|RecordRow l' v<-r,l'==l] of
+ [v] -> Just v
+ _ -> Nothing
+ _ -> Nothing
+
+-- | Smart constructor for selections
+selection t v =
+ -- Note: impossible cases can become possible after grammar transformation
+ case t of
+ TableValue tt r ->
+ case nub [rv|TableRow _ rv<-keep] of
+ [rv] -> rv
+ _ -> Selection (TableValue tt r') v
+ where
+ -- Don't introduce wildcard patterns, true to the canonical format,
+ -- annotate (or eliminate) rhs in impossible rows
+ r' = map trunc r
+ trunc r@(TableRow p e) = if mightMatchRow v r
+ then r
+ else TableRow p (impossible e)
+ {-
+ -- Creates smaller tables, but introduces wildcard patterns
+ r' = if null discard
+ then r
+ else keep++[TableRow WildPattern impossible]
+ -}
+ (keep,discard) = partition (mightMatchRow v) r
+ _ -> Selection t v
+
+impossible = CommentedValue "impossible"
+
+mightMatchRow v (TableRow p _) =
+ case p of
+ WildPattern -> True
+ _ -> mightMatch v p
+
+mightMatch v p =
+ case v of
+ ConcatValue _ _ -> False
+ ParamConstant (Param c1 pvs) ->
+ case p of
+ ParamPattern (Param c2 pps) -> c1==c2 && length pvs==length pps &&
+ and [mightMatch v p|(v,p)<-zip pvs pps]
+ _ -> False
+ RecordValue rv ->
+ case p of
+ RecordPattern rp ->
+ and [maybe False (flip mightMatch p) (proj v l) | RecordRow l p<-rp]
+ _ -> False
+ _ -> True
+
+patVars p =
+ case p of
+ PV x -> [x]
+ PAs x p -> x:patVars p
+ _ -> collectPattOp patVars p
+
+convType = ppT
+ where
+ ppT t =
+ case t of
+ Table ti tv -> TableType (ppT ti) (ppT tv)
+ RecType rt -> RecordType (convFields rt)
+-- App tf ta -> TAp (ppT tf) (ppT ta)
+-- FV [] -> tcon0 (identS "({-empty variant-})")
+ Sort k -> convSort k
+-- EInt n -> tcon0 (identS ("({-"++show n++"-})")) -- type level numeric literal
+ FV (t:ts) -> ppT t -- !!
+ QC (m,n) -> ParamType (ParamTypeId ((gQId m n)))
+ Q (m,n) -> ParamType (ParamTypeId ((gQId m n)))
+ _ -> error $ "Missing case in convType for: "++show t
+
+ convFields = map convField . filter (not.isLockLabel.fst)
+ convField (l,r) = RecordRow (lblId l) (ppT r)
+
+ convSort k = case showIdent k of
+ "Float" -> FloatType
+ "Int" -> IntType
+ "Str" -> StrType
+ _ -> error ("convSort "++show k)
+
+toParamType t = case convType t of
+ ParamType pt -> pt
+ _ -> error ("toParamType "++show t)
+
+toParamId t = case toParamType t of
+ ParamTypeId p -> p
+
+paramType gr q@(_,n) =
+ case lookupOrigInfo gr q of
+ Ok (m,ResParam (Just (L _ ps)) _)
+ {- - | m/=cPredef && m/=moduleNameS "Prelude"-} ->
+ ((S.singleton (m,n),argTypes ps),
+ [ParamDef name (map (param m) ps)]
+ )
+ where name = (gQId m n)
+ Ok (m,ResOper _ (Just (L _ t)))
+ | m==cPredef && n==cInts ->
+ ((S.empty,S.empty),[]) {-
+ ((S.singleton (m,n),S.empty),
+ [Type (ConAp ((gQId m n)) [identS "n"]) (TId (identS "Int"))])-}
+ | otherwise ->
+ ((S.singleton (m,n),paramTypes gr t),
+ [ParamAliasDef ((gQId m n)) (convType t)])
+ _ -> ((S.empty,S.empty),[])
+ where
+ param m (n,ctx) = Param ((gQId m n)) [toParamId t|(_,_,t)<-ctx]
+ argTypes = S.unions . map argTypes1
+ argTypes1 (n,ctx) = S.unions [paramTypes gr t|(_,_,t)<-ctx]
+
+lblId = LabelId . render -- hmm
+modId (MN m) = ModId (showIdent m)
+
+class FromIdent i where gId :: Ident -> i
+
+instance FromIdent VarId where
+ gId i = if isWildIdent i then Anonymous else VarId (showIdent i)
+
+instance FromIdent C.FunId where gId = C.FunId . showIdent
+instance FromIdent CatId where gId = CatId . showIdent
+instance FromIdent ParamId where gId = ParamId . unqual
+instance FromIdent VarValueId where gId = VarValueId . unqual
+
+class FromIdent i => QualIdent i where gQId :: ModuleName -> Ident -> i
+
+instance QualIdent ParamId where gQId m n = ParamId (qual m n)
+instance QualIdent VarValueId where gQId m n = VarValueId (qual m n)
+
+qual m n = Qual (modId m) (showIdent n)
+unqual n = Unqual (showIdent n)
+
+convFlags gr mn =
+ Flags [(n,convLit v) |
+ (n,v)<-err (const []) (optionsPGF.mflags) (lookupModule gr mn)]
+ where
+ convLit l =
+ case l of
+ LStr s -> Str s
+ LInt i -> C.Int i
+ LFlt d -> Flt d
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index 70a3047b1..ee8bc18dc 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -1,12 +1,12 @@
-{-# LANGUAGE ImplicitParams, BangPatterns, FlexibleContexts #-}
+{-# LANGUAGE ImplicitParams, BangPatterns, FlexibleContexts, MagicHash #-}
module GF.Compile.GrammarToPGF (grammar2PGF) where
import GF.Compile.GeneratePMCFG
import GF.Compile.GenerateBC
import GF.Compile.OptimizePGF
-import PGF(CId,mkCId,Type,Hypo,Expr)
-import PGF.Internal
+import PGF2 hiding (mkType)
+import PGF2.Internal
import GF.Grammar.Predef
import GF.Grammar.Grammar hiding (Production)
import qualified GF.Grammar.Lookup as Look
@@ -19,18 +19,22 @@ import GF.Infra.UseIO (IOE)
import GF.Data.Operations
import Data.List
+import Data.Char
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Maybe(fromMaybe)
-grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map CId Double -> IO PGF
+import GHC.Prim
+import GHC.Base(getTag)
+
+grammar2PGF :: Options -> SourceGrammar -> ModuleName -> Map.Map PGF2.Fun Double -> IO PGF
grammar2PGF opts gr am probs = do
cnc_infos <- getConcreteInfos gr am
return $
build (let gflags = if flag optSplitPGF opts
- then [(mkCId "split", LStr "true")]
+ then [("split", LStr "true")]
else []
(an,abs) = mkAbstr am probs
cncs = map (mkConcr opts abs) cnc_infos
@@ -39,21 +43,21 @@ grammar2PGF opts gr am probs = do
cenv = resourceValues opts gr
aflags = err (const noOptions) mflags (lookupModule gr am)
- mkAbstr :: (?builder :: Builder s) => ModuleName -> Map.Map CId Double -> (CId, B s AbstrInfo)
+ mkAbstr :: (?builder :: Builder s) => ModuleName -> Map.Map PGF2.Fun Double -> (AbsName, B s AbstrInfo)
mkAbstr am probs = (mi2i am, newAbstr flags cats funs)
where
adefs =
[((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++
Look.allOrigInfos gr am
- flags = [(mkCId f,x) | (f,x) <- optionsPGF aflags]
+ flags = optionsPGF aflags
toLogProb = realToFrac . negate . log
cats = [(c', snd (mkContext [] cont), toLogProb (fromMaybe 0 (Map.lookup c' probs))) |
((m,c),AbsCat (Just (L _ cont))) <- adefs, let c' = i2i c]
- funs = [(f', mkType [] ty, arity, {-mkDef gr arity mdef,-} toLogProb (fromMaybe 0 (Map.lookup f' funs_probs))) |
+ funs = [(f', mkType [] ty, arity, toLogProb (fromMaybe 0 (Map.lookup f' funs_probs))) |
((m,f),AbsFun (Just (L _ ty)) ma mdef _) <- adefs,
let arity = mkArity ma mdef ty,
let f' = i2i f]
@@ -72,7 +76,10 @@ grammar2PGF opts gr am probs = do
mkConcr opts abs (cm,ex_seqs,cdefs) =
let cflags = err (const noOptions) mflags (lookupModule gr cm)
- flags = [(mkCId f,x) | (f,x) <- optionsPGF cflags]
+ ciCmp | flag optCaseSensitive cflags = compare
+ | otherwise = compareCaseInsensitive
+
+ flags = optionsPGF aflags
seqs = (mkSetArray . Set.fromList . concat) $
(elems (ex_seqs :: Array SeqId [Symbol]) : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
@@ -80,11 +87,11 @@ grammar2PGF opts gr am probs = do
!(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs
cnccat_ranges = Map.fromList (map (\(cid,s,e,_) -> (cid,(s,e))) cnccats)
!(!fid_cnt2,!productions,!lindefs,!linrefs,!cncfuns)
- = genCncFuns gr am cm ex_seqs seqs cdefs fid_cnt1 cnccat_ranges
+ = genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt1 cnccat_ranges
printnames = genPrintNames cdefs
- startCat = mkCId (fromMaybe "S" (flag optStartCat aflags))
+ startCat = (fromMaybe "S" (flag optStartCat aflags))
(lindefs',linrefs',productions',cncfuns',sequences',cnccats') =
(if flag optOptimizePGF opts then optimizePGF startCat else id)
@@ -118,16 +125,13 @@ grammar2PGF opts gr am probs = do
(seqs,infos) <- addMissingPMCFGs cm seqs is
return (seqs, ((m,id), info) : infos)
-mkSetArray set = listArray (0,Set.size set-1) (Set.toList set)
-mkMapArray map = array (0,Map.size map-1) [(k,v) | (v,k) <- Map.toList map]
+i2i :: Ident -> String
+i2i = showIdent
-i2i :: Ident -> CId
-i2i = mkCId . showIdent
-
-mi2i :: ModuleName -> CId
+mi2i :: ModuleName -> String
mi2i (MN i) = i2i i
-mkType :: (?builder :: Builder s) => [Ident] -> A.Type -> B s PGF.Type
+mkType :: (?builder :: Builder s) => [Ident] -> A.Type -> B s PGF2.Type
mkType scope t =
case GM.typeForm t of
(hyps,(_,cat),args) -> let (scope',hyps') = mkContext scope hyps
@@ -164,7 +168,7 @@ mkPatt scope p =
in (scope',C.PImplArg p')
A.PTilde t -> ( scope,C.PTilde (mkExp scope t))
-}
-mkContext :: (?builder :: Builder s) => [Ident] -> A.Context -> ([Ident],[B s PGF.Hypo])
+mkContext :: (?builder :: Builder s) => [Ident] -> A.Context -> ([Ident],[B s PGF2.Hypo])
mkContext scope hyps = mapAccumL (\scope (bt,x,ty) -> let ty' = mkType scope ty
in if x == identW
then ( scope,hypo bt (i2i x) ty')
@@ -206,16 +210,17 @@ genCncFuns :: Grammar
-> ModuleName
-> ModuleName
-> Array SeqId [Symbol]
+ -> ([Symbol] -> [Symbol] -> Ordering)
-> Array SeqId [Symbol]
-> [(QIdent, Info)]
-> FId
- -> Map.Map CId (Int,Int)
+ -> Map.Map PGF2.Cat (Int,Int)
-> (FId,
[(FId, [Production])],
[(FId, [FunId])],
[(FId, [FunId])],
- [(CId,[SeqId])])
-genCncFuns gr am cm ex_seqs seqs cdefs fid_cnt cnccat_ranges =
+ [(PGF2.Fun,[SeqId])])
+genCncFuns gr am cm ex_seqs ciCmp seqs cdefs fid_cnt cnccat_ranges =
let (fid_cnt1,funs_cnt1,funs1,lindefs,linrefs) = mkCncCats cdefs fid_cnt 0 [] IntMap.empty IntMap.empty
(fid_cnt2,funs_cnt2,funs2,prods0) = mkCncFuns cdefs fid_cnt1 funs_cnt1 funs1 lindefs Map.empty IntMap.empty
prods = [(fid,Set.toList prodSet) | (fid,prodSet) <- IntMap.toList prods0]
@@ -304,7 +309,7 @@ genCncFuns gr am cm ex_seqs seqs cdefs fid_cnt cnccat_ranges =
newIndex mseqs i = binSearch (mseqs ! i) seqs (bounds seqs)
binSearch v arr (i,j)
- | i <= j = case compare v (arr ! k) of
+ | i <= j = case ciCmp v (arr ! k) of
LT -> binSearch v arr (i,k-1)
EQ -> k
GT -> binSearch v arr (k+1,j)
@@ -323,3 +328,121 @@ genPrintNames cdefs =
flatten (K s) = s
flatten (Alts x _) = flatten x
flatten (C x y) = flatten x +++ flatten y
+
+mkArray lst = listArray (0,length lst-1) lst
+mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
+mkSetArray set = listArray (0,Set.size set-1) (Set.toList set)
+
+-- The following is a version of Data.List.sortBy which together
+-- with the sorting also eliminates duplicate values
+sortNubBy cmp = mergeAll . sequences
+ where
+ sequences (a:b:xs) =
+ case cmp a b of
+ GT -> descending b [a] xs
+ EQ -> sequences (b:xs)
+ LT -> ascending b (a:) xs
+ sequences xs = [xs]
+
+ descending a as [] = [a:as]
+ descending a as (b:bs) =
+ case cmp a b of
+ GT -> descending b (a:as) bs
+ EQ -> descending a as bs
+ LT -> (a:as) : sequences (b:bs)
+
+ ascending a as [] = let !x = as [a]
+ in [x]
+ ascending a as (b:bs) =
+ case cmp a b of
+ GT -> let !x = as [a]
+ in x : sequences (b:bs)
+ EQ -> ascending a as bs
+ LT -> ascending b (\ys -> as (a:ys)) bs
+
+ mergeAll [x] = x
+ mergeAll xs = mergeAll (mergePairs xs)
+
+ mergePairs (a:b:xs) = let !x = merge a b
+ in x : mergePairs xs
+ mergePairs xs = xs
+
+ merge as@(a:as') bs@(b:bs') =
+ case cmp a b of
+ GT -> b:merge as bs'
+ EQ -> a:merge as' bs'
+ LT -> a:merge as' bs
+ merge [] bs = bs
+ merge as [] = as
+
+-- The following function does case-insensitive comparison of sequences.
+-- This is used to allow case-insensitive parsing, while
+-- the linearizer still has access to the original cases.
+
+compareCaseInsensitive [] [] = EQ
+compareCaseInsensitive [] _ = LT
+compareCaseInsensitive _ [] = GT
+compareCaseInsensitive (x:xs) (y:ys) =
+ case compareSym x y of
+ EQ -> compareCaseInsensitive xs ys
+ x -> x
+ where
+ compareSym s1 s2 =
+ case s1 of
+ SymCat d1 r1
+ -> case s2 of
+ SymCat d2 r2
+ -> case compare d1 d2 of
+ EQ -> r1 `compare` r2
+ x -> x
+ _ -> LT
+ SymLit d1 r1
+ -> case s2 of
+ SymCat {} -> GT
+ SymLit d2 r2
+ -> case compare d1 d2 of
+ EQ -> r1 `compare` r2
+ x -> x
+ _ -> LT
+ SymVar d1 r1
+ -> if tagToEnum# (getTag s2 ># 2#)
+ then LT
+ else case s2 of
+ SymVar d2 r2
+ -> case compare d1 d2 of
+ EQ -> r1 `compare` r2
+ x -> x
+ _ -> GT
+ SymKS t1
+ -> if tagToEnum# (getTag s2 ># 3#)
+ then LT
+ else case s2 of
+ SymKS t2 -> t1 `compareToken` t2
+ _ -> GT
+ SymKP a1 b1
+ -> if tagToEnum# (getTag s2 ># 4#)
+ then LT
+ else case s2 of
+ SymKP a2 b2
+ -> case compare a1 a2 of
+ EQ -> b1 `compare` b2
+ x -> x
+ _ -> GT
+ _ -> let t1 = getTag s1
+ t2 = getTag s2
+ in if tagToEnum# (t1 <# t2)
+ then LT
+ else if tagToEnum# (t1 ==# t2)
+ then EQ
+ else GT
+
+ compareToken [] [] = EQ
+ compareToken [] _ = LT
+ compareToken _ [] = GT
+ compareToken (x:xs) (y:ys)
+ | x == y = compareToken xs ys
+ | otherwise = case compare (toLower x) (toLower y) of
+ EQ -> case compareToken xs ys of
+ EQ -> compare x y
+ x -> x
+ x -> x
diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs
deleted file mode 100644
index 5b2aa3bf0..000000000
--- a/src/compiler/GF/Compile/PGFtoJS.hs
+++ /dev/null
@@ -1,88 +0,0 @@
-module GF.Compile.PGFtoJS (pgf2js) where
-
-import PGF
-import PGF.Internal
-import qualified GF.JavaScript.AbsJS as JS
-import qualified GF.JavaScript.PrintJS as JS
-import Data.Map (Map)
-import qualified Data.Set as Set
-import qualified Data.Map as Map
-import qualified Data.IntMap as IntMap
-
-pgf2js :: PGF -> String
-pgf2js pgf =
- JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]]
- where
- n = showCId $ abstractName pgf
- start = showType [] $ startCat pgf
- grammar = new "GFGrammar" [js_abstract, js_concrete]
- js_abstract = abstract2js start pgf
- js_concrete = JS.EObj $ map (concrete2js pgf) (languages pgf)
-
-abstract2js :: String -> PGF -> JS.Expr
-abstract2js start pgf = new "GFAbstract" [JS.EStr start, JS.EObj [absdef2js f ty | f <- functions pgf, Just ty <- [functionType pgf f]]]
-
-absdef2js :: CId -> Type -> JS.Property
-absdef2js f typ =
- let (hypos,cat,_) = unType typ
- args = [cat | (_,_,typ) <- hypos, let (hypos,cat,_) = unType typ]
- in JS.Prop (JS.IdentPropName (JS.Ident (showCId f))) (new "Type" [JS.EArray [JS.EStr (showCId x) | x <- args], JS.EStr (showCId cat)])
-
-lit2js (LStr s) = JS.EStr s
-lit2js (LInt n) = JS.EInt n
-lit2js (LFlt d) = JS.EDbl d
-
-concrete2js :: PGF -> Language -> JS.Property
-concrete2js pgf lang =
- JS.Prop l (new "GFConcrete" [mapToJSObj (lit2js) $ concrFlags cnc,
- JS.EObj [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (concrProductions cnc cat))) | cat <- [0..concrTotalCats cnc]],
- JS.EArray [ffun2js (concrFunction cnc funid) | funid <- [0..concrTotalFuns cnc]],
- JS.EArray [seq2js (concrSequence cnc seqid) | seqid <- [0..concrTotalSeqs cnc]],
- JS.EObj $ map cats (concrCategories cnc),
- JS.EInt (concrTotalCats cnc)])
- where
- cnc = lookConcr pgf lang
- l = JS.IdentPropName (JS.Ident (showCId lang))
-
- litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]),
- JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]),
- JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])]
-
- cats (c,start,end,_) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
- ,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
-
-children :: JS.Ident
-children = JS.Ident "cs"
-
-frule2js :: Production -> JS.Expr
-frule2js (PApply funid args) = new "Apply" [JS.EInt funid, JS.EArray (map farg2js args)]
-frule2js (PCoerce arg) = new "Coerce" [JS.EInt arg]
-
-farg2js (PArg hypos fid) = new "PArg" (map (JS.EInt . snd) hypos ++ [JS.EInt fid])
-
-ffun2js (f,lins) = new "CncFun" [JS.EStr (showCId f), JS.EArray (map JS.EInt lins)]
-
-seq2js :: [Symbol] -> JS.Expr
-seq2js seq = JS.EArray [sym2js s | s <- seq]
-
-sym2js :: Symbol -> JS.Expr
-sym2js (SymCat n l) = new "SymCat" [JS.EInt n, JS.EInt l]
-sym2js (SymLit n l) = new "SymLit" [JS.EInt n, JS.EInt l]
-sym2js (SymVar n l) = new "SymVar" [JS.EInt n, JS.EInt l]
-sym2js (SymKS t) = new "SymKS" [JS.EStr t]
-sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)]
-sym2js SymBIND = new "SymKS" [JS.EStr "&+"]
-sym2js SymSOFT_BIND = new "SymKS" [JS.EStr "&+"]
-sym2js SymSOFT_SPACE = new "SymKS" [JS.EStr "&+"]
-sym2js SymCAPIT = new "SymKS" [JS.EStr "&|"]
-sym2js SymALL_CAPIT = new "SymKS" [JS.EStr "&|"]
-sym2js SymNE = new "SymNE" []
-
-alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)]
-
-new :: String -> [JS.Expr] -> JS.Expr
-new f xs = JS.ENew (JS.Ident f) xs
-
-mapToJSObj :: (a -> JS.Expr) -> Map CId a -> JS.Expr
-mapToJSObj f m = JS.EObj [ JS.Prop (JS.IdentPropName (JS.Ident (showCId k))) (f v) | (k,v) <- Map.toList m ]
-
diff --git a/src/compiler/GF/Compile/PGFtoJSON.hs b/src/compiler/GF/Compile/PGFtoJSON.hs
new file mode 100644
index 000000000..e634dae67
--- /dev/null
+++ b/src/compiler/GF/Compile/PGFtoJSON.hs
@@ -0,0 +1,156 @@
+module GF.Compile.PGFtoJSON (pgf2json) where
+
+import PGF (showCId)
+import qualified PGF.Internal as M
+import PGF.Internal (
+ Abstr,
+ CId,
+ CncCat(..),
+ CncFun(..),
+ Concr,
+ DotPos,
+ Equation(..),
+ Literal(..),
+ PArg(..),
+ PGF,
+ Production(..),
+ Symbol(..),
+ Type,
+ absname,
+ abstract,
+ cflags,
+ cnccats,
+ cncfuns,
+ concretes,
+ funs,
+ productions,
+ sequences,
+ totalCats
+ )
+
+import qualified Text.JSON as JSON
+import Text.JSON (JSValue(..))
+
+import qualified Data.Array.IArray as Array
+import Data.Map (Map)
+import qualified Data.Set as Set
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+
+pgf2json :: PGF -> String
+pgf2json pgf =
+ JSON.encode $ JSON.makeObj
+ [ ("abstract", json_abstract)
+ , ("concretes", json_concretes)
+ ]
+ where
+ n = showCId $ absname pgf
+ as = abstract pgf
+ cs = Map.assocs (concretes pgf)
+ start = showCId $ M.lookStartCat pgf
+ json_abstract = abstract2json n start as
+ json_concretes = JSON.makeObj $ map concrete2json cs
+
+abstract2json :: String -> String -> Abstr -> JSValue
+abstract2json name start ds =
+ JSON.makeObj
+ [ ("name", mkJSStr name)
+ , ("startcat", mkJSStr start)
+ , ("funs", JSON.makeObj $ map absdef2json (Map.assocs (funs ds)))
+ ]
+
+absdef2json :: (CId,(Type,Int,Maybe ([Equation],[[M.Instr]]),Double)) -> (String,JSValue)
+absdef2json (f,(typ,_,_,_)) = (showCId f,sig)
+ where
+ (args,cat) = M.catSkeleton typ
+ sig = JSON.makeObj
+ [ ("args", JSArray $ map (mkJSStr.showCId) args)
+ , ("cat", mkJSStr $ showCId cat)
+ ]
+
+lit2json :: Literal -> JSValue
+lit2json (LStr s) = mkJSStr s
+lit2json (LInt n) = mkJSInt n
+lit2json (LFlt d) = JSRational True (toRational d)
+
+concrete2json :: (CId,Concr) -> (String,JSValue)
+concrete2json (c,cnc) = (showCId c,obj)
+ where
+ obj = JSON.makeObj
+ [ ("flags", JSON.makeObj [ (showCId k, lit2json v) | (k,v) <- Map.toList (cflags cnc) ])
+ , ("productions", JSON.makeObj [ (show cat, JSArray (map frule2json (Set.toList set))) | (cat,set) <- IntMap.toList (productions cnc)])
+ , ("functions", JSArray (map ffun2json (Array.elems (cncfuns cnc))))
+ , ("sequences", JSArray (map seq2json (Array.elems (sequences cnc))))
+ , ("categories", JSON.makeObj $ map cats2json (Map.assocs (cnccats cnc)))
+ , ("totalfids", mkJSInt (totalCats cnc))
+ ]
+
+cats2json :: (CId, CncCat) -> (String,JSValue)
+cats2json (c,CncCat start end _) = (showCId c, ixs)
+ where
+ ixs = JSON.makeObj
+ [ ("start", mkJSInt start)
+ , ("end", mkJSInt end)
+ ]
+
+frule2json :: Production -> JSValue
+frule2json (PApply fid args) =
+ JSON.makeObj
+ [ ("type", mkJSStr "Apply")
+ , ("fid", mkJSInt fid)
+ , ("args", JSArray (map farg2json args))
+ ]
+frule2json (PCoerce arg) =
+ JSON.makeObj
+ [ ("type", mkJSStr "Coerce")
+ , ("arg", mkJSInt arg)
+ ]
+
+farg2json :: PArg -> JSValue
+farg2json (PArg hypos fid) =
+ JSON.makeObj
+ [ ("type", mkJSStr "PArg")
+ , ("hypos", JSArray $ map (mkJSInt . snd) hypos)
+ , ("fid", mkJSInt fid)
+ ]
+
+ffun2json :: CncFun -> JSValue
+ffun2json (CncFun f lins) =
+ JSON.makeObj
+ [ ("name", mkJSStr $ showCId f)
+ , ("lins", JSArray (map mkJSInt (Array.elems lins)))
+ ]
+
+seq2json :: Array.Array DotPos Symbol -> JSValue
+seq2json seq = JSArray [sym2json s | s <- Array.elems seq]
+
+sym2json :: Symbol -> JSValue
+sym2json (SymCat n l) = new "SymCat" [mkJSInt n, mkJSInt l]
+sym2json (SymLit n l) = new "SymLit" [mkJSInt n, mkJSInt l]
+sym2json (SymVar n l) = new "SymVar" [mkJSInt n, mkJSInt l]
+sym2json (SymKS t) = new "SymKS" [mkJSStr t]
+sym2json (SymKP ts alts) = new "SymKP" [JSArray (map sym2json ts), JSArray (map alt2json alts)]
+sym2json SymBIND = new "SymKS" [mkJSStr "&+"]
+sym2json SymSOFT_BIND = new "SymKS" [mkJSStr "&+"]
+sym2json SymSOFT_SPACE = new "SymKS" [mkJSStr "&+"]
+sym2json SymCAPIT = new "SymKS" [mkJSStr "&|"]
+sym2json SymALL_CAPIT = new "SymKS" [mkJSStr "&|"]
+sym2json SymNE = new "SymNE" []
+
+alt2json :: ([Symbol],[String]) -> JSValue
+alt2json (ps,ts) = new "Alt" [JSArray (map sym2json ps), JSArray (map mkJSStr ts)]
+
+new :: String -> [JSValue] -> JSValue
+new f xs =
+ JSON.makeObj
+ [ ("type", mkJSStr f)
+ , ("args", JSArray xs)
+ ]
+
+-- | Make JSON value from string
+mkJSStr :: String -> JSValue
+mkJSStr = JSString . JSON.toJSString
+
+-- | Make JSON value from integer
+mkJSInt :: Integral a => a -> JSValue
+mkJSInt = JSRational False . toRational
diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs
deleted file mode 100644
index 72fadfa79..000000000
--- a/src/compiler/GF/Compile/PGFtoProlog.hs
+++ /dev/null
@@ -1,186 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : PGFtoProlog
--- Maintainer : Peter Ljunglöf
---
--- exports a GF grammar into a Prolog module
------------------------------------------------------------------------------
-
-module GF.Compile.PGFtoProlog (grammar2prolog) where
-
-import PGF
-import PGF.Internal
-
-import GF.Data.Operations
-
-import qualified Data.Array.IArray as Array
-import qualified Data.Set as Set
-import qualified Data.Map as Map
-import qualified Data.IntMap as IntMap
-import Data.Char (isAlphaNum, isAscii, isAsciiLower, isAsciiUpper, ord)
-import Data.List (isPrefixOf, mapAccumL)
-
-grammar2prolog :: PGF -> String
-grammar2prolog pgf
- = ("%% This file was automatically generated by GF" +++++
- ":- style_check(-singleton)." +++++
- plFacts wildCId "abstract" 1 "(?AbstractName)"
- [[plp name]] ++++
- plFacts wildCId "concrete" 2 "(?AbstractName, ?ConcreteName)"
- [[plp name, plp cncname] |
- cncname <- languages pgf] ++++
- plFacts wildCId "flag" 2 "(?Flag, ?Value): global flags"
- [[plp f, plp v] |
- (f, v) <- Map.assocs (globalFlags pgf)] ++++
- plAbstract name pgf ++++
- unlines [plConcrete name (lookConcr pgf name) | name <- languages pgf]
- )
- where name = abstractName pgf
-
-----------------------------------------------------------------------
--- abstract syntax
-
-plAbstract :: CId -> PGF -> String
-plAbstract name pgf
- = (plHeader "Abstract syntax" ++++
- plFacts name "flag" 2 "(?Flag, ?Value): flags for abstract syntax"
- [[plp f, plp v] |
- (f, v) <- Map.assocs (abstrFlags pgf)] ++++
- plFacts name "cat" 2 "(?Type, ?[X:Type,...])"
- [[plType cat, []] | cat <- categories pgf] ++++
- plFacts name "fun" 3 "(?Fun, ?Type, ?[X:Type,...])"
- [[plp fun, plType cat, plHypos hypos] |
- fun <- functions pgf, Just typ <- [functionType pgf fun],
- let (hypos,cat,_) = unType typ]
- )
- where plType cat = plTerm (plp cat) []
- plHypos hypos = plList [plOper ":" (plp x) (plp ty) | (_, x, ty) <- hypos]
-
-----------------------------------------------------------------------
--- concrete syntax
-
-plConcrete :: CId -> Concr -> String
-plConcrete name cnc
- = (plHeader ("Concrete syntax: " ++ plp name) ++++
- plFacts name "flag" 2 "(?Flag, ?Value): flags for concrete syntax"
- [[plp f, plp v] |
- (f, v) <- Map.assocs (concrFlags cnc)] ++++
- plFacts name "prod" 3 "(?CncCat, ?CncFun, ?[CncCat])"
- [[plCat cat, fun, plTerm "c" (map plCat args)] |
- cat <- [0..concrTotalCats cnc-1],
- (fun, args) <- map plProduction (concrProductions cnc cat)] ++++
- plFacts name "cncfun" 3 "(?CncFun, ?[Seq,...], ?AbsFun)"
- [[plFun funid, plTerm "s" (map plSeq lins), plp absfun] |
- funid <- [0..concrTotalFuns cnc-1], let (absfun,lins) = concrFunction cnc funid] ++++
- plFacts name "seq" 2 "(?Seq, ?[Term])"
- [[plSeq seqid, plp (concrSequence cnc seqid)] |
- seqid <- [0..concrTotalSeqs cnc-1]] ++++
- plFacts name "cnccat" 2 "(?AbsCat, ?[CnCCat])"
- [[plp cat, plList (map plCat [start..end])] |
- (cat,start,end,_) <- concrCategories cnc]
- )
- where plProduction (PCoerce arg) = ("-", [arg])
- plProduction (PApply funid args) = (plFun funid, [fid | PArg hypos fid <- args])
-
-----------------------------------------------------------------------
--- prolog-printing pgf datatypes
-
-instance PLPrint Type where
- plp ty
- | null hypos = result
- | otherwise = plOper " -> " plHypos result
- where (hypos,cat,_) = unType ty
- result = plTerm (plp cat) []
- plHypos = plList [plOper ":" (plp x) (plp ty) | (_,x,ty) <- hypos]
-
-instance PLPrint CId where
- plp cid | isLogicalVariable str || cid == wildCId = plVar str
- | otherwise = plAtom str
- where str = showCId cid
-
-instance PLPrint Literal where
- plp (LStr s) = plp s
- plp (LInt n) = plp (show n)
- plp (LFlt f) = plp (show f)
-
-instance PLPrint Symbol where
- plp (SymCat n l) = plOper ":" (show n) (show l)
- plp (SymLit n l) = plTerm "lit" [show n, show l]
- plp (SymVar n l) = plTerm "var" [show n, show l]
- plp (SymKS t) = plAtom t
- plp (SymKP ts alts) = plTerm "pre" [plList (map plp ts), plList (map plAlt alts)]
- where plAlt (ps,ts) = plOper "/" (plList (map plp ps)) (plList (map plAtom ts))
-
-class PLPrint a where
- plp :: a -> String
- plps :: [a] -> String
- plps = plList . map plp
-
-instance PLPrint Char where
- plp c = plAtom [c]
- plps s = plAtom s
-
-instance PLPrint a => PLPrint [a] where
- plp = plps
-
-----------------------------------------------------------------------
--- other prolog-printing functions
-
-plCat :: Int -> String
-plCat n = plAtom ('c' : show n)
-
-plFun :: Int -> String
-plFun n = plAtom ('f' : show n)
-
-plSeq :: Int -> String
-plSeq n = plAtom ('s' : show n)
-
-plHeader :: String -> String
-plHeader hdr = "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% " ++ hdr ++ "\n"
-
-plFacts :: CId -> String -> Int -> String -> [[String]] -> String
-plFacts mod pred arity comment facts = "%% " ++ pred ++ comment ++++ clauses
- where clauses = (if facts == [] then ":- dynamic " ++ pred ++ "/" ++ show arity ++ ".\n"
- else unlines [mod' ++ plTerm pred args ++ "." | args <- facts])
- mod' = if mod == wildCId then "" else plp mod ++ ": "
-
-plTerm :: String -> [String] -> String
-plTerm fun args = plAtom fun ++ prParenth (prTList ", " args)
-
-plList :: [String] -> String
-plList xs = prBracket (prTList "," xs)
-
-plOper :: String -> String -> String -> String
-plOper op a b = prParenth (a ++ op ++ b)
-
-plVar :: String -> String
-plVar = varPrefix . concatMap changeNonAlphaNum
- where varPrefix var@(c:_) | isAsciiUpper c || c=='_' = var
- | otherwise = "_" ++ var
- changeNonAlphaNum c | isAlphaNumUnderscore c = [c]
- | otherwise = "_" ++ show (ord c) ++ "_"
-
-plAtom :: String -> String
-plAtom "" = "''"
-plAtom atom@(c:cs) | isAsciiLower c && all isAlphaNumUnderscore cs
- || c == '\'' && cs /= "" && last cs == '\'' = atom
- | otherwise = "'" ++ changeQuote atom ++ "'"
- where changeQuote ('\'':cs) = '\\' : '\'' : changeQuote cs
- changeQuote ('\\':cs) = '\\' : '\\' : changeQuote cs
- changeQuote (c:cs) = c : changeQuote cs
- changeQuote "" = ""
-
-isAlphaNumUnderscore :: Char -> Bool
-isAlphaNumUnderscore c = (isAscii c && isAlphaNum c) || c == '_'
-
-----------------------------------------------------------------------
--- prolog variables
-
-createLogicalVariable :: Int -> CId
-createLogicalVariable n = mkCId (logicalVariablePrefix ++ show n)
-
-isLogicalVariable :: String -> Bool
-isLogicalVariable = isPrefixOf logicalVariablePrefix
-
-logicalVariablePrefix :: String
-logicalVariablePrefix = "X"
diff --git a/src/compiler/GF/Compile/PGFtoPython.hs b/src/compiler/GF/Compile/PGFtoPython.hs
deleted file mode 100644
index eeed374cf..000000000
--- a/src/compiler/GF/Compile/PGFtoPython.hs
+++ /dev/null
@@ -1,114 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : PGFtoPython
--- Maintainer : Peter Ljunglöf
---
--- exports a GF grammar into a Python module
------------------------------------------------------------------------------
-
-{-# LANGUAGE FlexibleContexts #-}
-module GF.Compile.PGFtoPython (pgf2python) where
-
-import PGF
-import PGF.Internal
-import qualified Data.Map as Map
-import GF.Data.Operations
-
-pgf2python :: PGF -> String
-pgf2python pgf = ("# -*- coding: utf-8 -*-" ++++
- "# This file was automatically generated by GF" +++++
- showCId name +++ "=" +++
- pyDict 1 pyStr id [
- ("flags", pyDict 2 pyCId pyLiteral (Map.assocs (globalFlags pgf))),
- ("abstract", pyDict 2 pyStr id [
- ("name", pyCId name),
- ("start", pyCId start),
- ("flags", pyDict 3 pyCId pyLiteral (Map.assocs (abstrFlags pgf))),
- ("funs", pyDict 3 pyCId pyAbsdef [(f,ty) | f <- functions pgf, Just ty <- [functionType pgf f]])
- ]),
- ("concretes", pyDict 2 pyCId pyConcrete [(lang,lookConcr pgf lang) | lang <- languages pgf])
- ] ++ "\n")
- where
- name = abstractName pgf
- (_,start,_) = unType (startCat pgf)
--- cncs = concretes pgf
-
-pyAbsdef :: Type -> String
-pyAbsdef typ = pyTuple 0 id [pyCId cat, pyList 0 pyCId args]
- where (hypos,cat,_) = unType typ
- args = [cat | (_,_,typ) <- hypos, let (_,cat,_) = unType typ]
-
-pyLiteral :: Literal -> String
-pyLiteral (LStr s) = pyStr s
-pyLiteral (LInt n) = show n
-pyLiteral (LFlt d) = show d
-
-pyConcrete :: Concr -> String
-pyConcrete cnc = pyDict 3 pyStr id [
- ("flags", pyDict 0 pyCId pyLiteral (Map.assocs (concrFlags cnc))),
- ("productions", pyDict 4 pyCat pyProds [(fid,concrProductions cnc fid) | fid <- [0..concrTotalCats cnc-1]]),
- ("cncfuns", pyDict 4 pyFun pyCncFun [(funid,concrFunction cnc funid) | funid <- [0..concrTotalFuns cnc-1]]),
- ("sequences", pyDict 4 pySeq pySymbols [(seqid,concrSequence cnc seqid) | seqid <- [0..concrTotalSeqs cnc-1]]),
- ("cnccats", pyDict 4 pyCId pyCncCat [(cat,(s,e,lbls)) | (cat,s,e,lbls) <- concrCategories cnc]),
- ("size", show (concrTotalCats cnc))
- ]
- where pyProds prods = pyList 5 pyProduction prods
- pyCncCat (start,end,_) = pyList 0 pyCat [start..end]
- pyCncFun (f,lins) = pyTuple 0 id [pyList 0 pySeq lins, pyCId f]
- pySymbols syms = pyList 0 pySymbol syms
-
-pyProduction :: Production -> String
-pyProduction (PCoerce arg) = pyTuple 0 id [pyStr "", pyList 0 pyCat [arg]]
-pyProduction (PApply funid args) = pyTuple 0 id [pyFun funid, pyList 0 pyPArg args]
- where pyPArg (PArg [] fid) = pyCat fid
- pyPArg (PArg hypos fid) = pyTuple 0 pyCat (fid : map snd hypos)
-
-pySymbol :: Symbol -> String
-pySymbol (SymCat n l) = pyTuple 0 show [n, l]
-pySymbol (SymLit n l) = pyDict 0 pyStr id [("lit", pyTuple 0 show [n, l])]
-pySymbol (SymVar n l) = pyDict 0 pyStr id [("var", pyTuple 0 show [n, l])]
-pySymbol (SymKS t) = pyStr t
-pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pySymbol ts), ("alts", pyList 0 alt2py alts)]
- where alt2py (ps,ts) = pyTuple 0 (pyList 0 pyStr) [map pySymbol ps, ts]
-pySymbol SymBIND = pyStr "&+"
-pySymbol SymSOFT_BIND = pyStr "&+"
-pySymbol SymSOFT_SPACE = pyStr "&+"
-pySymbol SymCAPIT = pyStr "&|"
-pySymbol SymALL_CAPIT = pyStr "&|"
-pySymbol SymNE = pyDict 0 pyStr id [("nonExist", pyTuple 0 id [])]
-
-----------------------------------------------------------------------
--- python helpers
-
-pyDict :: Int -> (k -> String) -> (v -> String) -> [(k, v)] -> String
-pyDict n pk pv [] = "{}"
-pyDict n pk pv kvlist = prCurly (pyIndent n ++ prTList ("," ++ pyIndent n) (map pyKV kvlist) ++ pyIndent n)
- where pyKV (k, v) = pk k ++ ":" ++ pv v
-
-pyList :: Int -> (v -> String) -> [v] -> String
-pyList n pv [] = "[]"
-pyList n pv xs = prBracket (pyIndent n ++ prTList ("," ++ pyIndent n) (map pv xs) ++ pyIndent n)
-
-pyTuple :: Int -> (v -> String) -> [v] -> String
-pyTuple n pv [] = "()"
-pyTuple n pv [x] = prParenth (pyIndent n ++ pv x ++ "," ++ pyIndent n)
-pyTuple n pv xs = prParenth (pyIndent n ++ prTList ("," ++ pyIndent n) (map pv xs) ++ pyIndent n)
-
-pyCat :: Int -> String
-pyCat n = pyStr ('C' : show n)
-
-pyFun :: Int -> String
-pyFun n = pyStr ('F' : show n)
-
-pySeq :: Int -> String
-pySeq n = pyStr ('S' : show n)
-
-pyStr :: String -> String
-pyStr s = 'u' : prQuotedString s
-
-pyCId :: CId -> String
-pyCId = pyStr . showCId
-
-pyIndent :: Int -> String
-pyIndent n | n > 0 = "\n" ++ replicate n ' '
- | otherwise = ""
diff --git a/src/compiler/GF/Compile/TypeCheck/RConcrete.hs b/src/compiler/GF/Compile/TypeCheck/RConcrete.hs
index 2fe08b256..f4d439eb0 100644
--- a/src/compiler/GF/Compile/TypeCheck/RConcrete.hs
+++ b/src/compiler/GF/Compile/TypeCheck/RConcrete.hs
@@ -359,12 +359,13 @@ getOverload gr g mt ot = case appForm ot of
nest 2 (showTypes pre)
return (mkApp fun tts, val)
([],[]) -> do
- checkError $ "no overload instance of" <+> ppTerm Unqualified 0 f $$
- "for" $$
+ checkError $ "no overload instance of" <+> ppTerm Qualified 0 f $$
+ maybe empty (\x -> "with value type" <+> ppType x) mt $$
+ "for argument list" $$
nest 2 stysError $$
- "among" $$
- nest 2 (vcat stypsError) $$
- maybe empty (\x -> "with value type" <+> ppType x) mt
+ "among alternatives" $$
+ nest 2 (vcat stypsError)
+
(vfs1,vfs2) -> case (noProds vfs1,noProds vfs2) of
([(val,fun)],_) -> do
diff --git a/src/compiler/GF/Compile/pgf.schema.json b/src/compiler/GF/Compile/pgf.schema.json
new file mode 100644
index 000000000..2058e9a70
--- /dev/null
+++ b/src/compiler/GF/Compile/pgf.schema.json
@@ -0,0 +1,232 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "$id": "http://grammaticalframework.org/pgf.schema.json",
+ "type": "object",
+ "title": "PGF JSON Schema",
+ "required": [
+ "abstract",
+ "concretes"
+ ],
+ "properties": {
+ "abstract": {
+ "type": "object",
+ "required": [
+ "name",
+ "startcat",
+ "funs"
+ ],
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "startcat": {
+ "type": "string"
+ },
+ "funs": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "object",
+ "required": [
+ "args",
+ "cat"
+ ],
+ "properties": {
+ "args": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "cat": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ },
+ "concretes": {
+ "type": "object",
+ "additionalProperties": {
+ "required": [
+ "flags",
+ "productions",
+ "functions",
+ "sequences",
+ "categories",
+ "totalfids"
+ ],
+ "properties": {
+ "flags": {
+ "type": "object",
+ "additionalProperties": {
+ "type": ["string", "number"]
+ }
+ },
+ "productions": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "$ref": "#/definitions/apply"
+ },
+ {
+ "$ref": "#/definitions/coerce"
+ }
+ ]
+ }
+ }
+ },
+ "functions": {
+ "type": "array",
+ "items": {
+ "title": "CncFun",
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "lins": {
+ "type": "array",
+ "items": {
+ "type": "integer"
+ }
+ }
+ }
+ }
+ },
+ "sequences": {
+ "type": "array",
+ "items": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/sym"
+ }
+ }
+ },
+ "categories": {
+ "type": "object",
+ "additionalProperties": {
+ "title": "CncCat",
+ "type": "object",
+ "required": [
+ "start",
+ "end"
+ ],
+ "properties": {
+ "start": {
+ "type": "integer"
+ },
+ "end": {
+ "type": "integer"
+ }
+ }
+ }
+ },
+ "totalfids": {
+ "type": "integer"
+ }
+ }
+ }
+ }
+ },
+ "definitions": {
+ "apply": {
+ "required": [
+ "type",
+ "fid",
+ "args"
+ ],
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": ["Apply"]
+ },
+ "fid": {
+ "type": "integer"
+ },
+ "args": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/parg"
+ }
+ }
+ }
+ },
+ "coerce": {
+ "required": [
+ "type",
+ "arg"
+ ],
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": ["Coerce"]
+ },
+ "arg": {
+ "type": "integer"
+ }
+ }
+ },
+ "parg": {
+ "required": [
+ "type",
+ "hypos",
+ "fid"
+ ],
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": ["PArg"]
+ },
+ "hypos": {
+ "type": "array",
+ "items": {
+ "type": "integer"
+ }
+ },
+ "fid": {
+ "type": "integer"
+ }
+ }
+ },
+ "sym": {
+ "title": "Sym",
+ "required": [
+ "type",
+ "args"
+ ],
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "SymCat",
+ "SymLit",
+ "SymVar",
+ "SymKS",
+ "SymKP",
+ "SymNE"
+ ]
+ },
+ "args": {
+ "type": "array",
+ "items": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "integer"
+ },
+ {
+ "$ref": "#/definitions/sym"
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/compiler/GF/Compiler.hs b/src/compiler/GF/Compiler.hs
index f03230f76..8479fe28a 100644
--- a/src/compiler/GF/Compiler.hs
+++ b/src/compiler/GF/Compiler.hs
@@ -1,11 +1,12 @@
module GF.Compiler (mainGFC, linkGrammars, writeGrammar, writeOutputs) where
-import PGF
-import PGF.Internal(unionPGF,writePGF,writeConcr)
+import PGF2
+import PGF2.Internal(unionPGF,writePGF,writeConcr)
import GF.Compile as S(batchCompile,link,srcAbsName)
import GF.CompileInParallel as P(parallelBatchCompile)
import GF.Compile.Export
import GF.Compile.ConcreteToHaskell(concretes2haskell)
+import GF.Compile.GrammarToCanonical--(concretes2canonical)
import GF.Compile.CFGtoPGF
import GF.Compile.GetGrammar
import GF.Grammar.BNFC
@@ -16,12 +17,13 @@ import GF.Infra.UseIO
import GF.Infra.Option
import GF.Data.ErrM
import GF.System.Directory
-import GF.Text.Pretty(render)
+import GF.Text.Pretty(render,render80)
import Data.Maybe
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.ByteString.Lazy as BSL
+import GF.Grammar.CanonicalJSON (encodeJSON)
import System.FilePath
import Control.Monad(when,unless,forM_)
@@ -46,7 +48,7 @@ mainGFC opts fs = do
compileSourceFiles :: Options -> [FilePath] -> IOE ()
compileSourceFiles opts fs =
do output <- batchCompile opts fs
- cncs2haskell output
+ exportCanonical output
unless (flag optStopAfterPhase opts == Compile) $
linkGrammars opts output
where
@@ -54,15 +56,35 @@ compileSourceFiles opts fs =
batchCompile' opts fs = do (t,cnc_gr) <- S.batchCompile opts fs
return (t,[cnc_gr])
- cncs2haskell output =
- when (FmtHaskell `elem` flag optOutputFormats opts &&
- haskellOption opts HaskellConcrete) $
- mapM_ cnc2haskell (snd output)
+ exportCanonical (_time, canonical) =
+ do when (FmtHaskell `elem` ofmts && haskellOption opts HaskellConcrete) $
+ mapM_ cnc2haskell canonical
+ when (FmtCanonicalGF `elem` ofmts) $
+ do createDirectoryIfMissing False "canonical"
+ mapM_ abs2canonical canonical
+ mapM_ cnc2canonical canonical
+ when (FmtCanonicalJson `elem` ofmts) $ mapM_ grammar2json canonical
+ where
+ ofmts = flag optOutputFormats opts
cnc2haskell (cnc,gr) =
- mapM_ writeHs $ concretes2haskell opts (srcAbsName gr cnc) gr
+ do mapM_ writeExport $ concretes2haskell opts (srcAbsName gr cnc) gr
- writeHs (path,s) = writing opts path $ writeUTF8File path s
+ abs2canonical (cnc,gr) =
+ writeExport ("canonical/"++render absname++".gf",render80 canAbs)
+ where
+ absname = srcAbsName gr cnc
+ canAbs = abstract2canonical absname gr
+
+ cnc2canonical (cnc,gr) =
+ mapM_ (writeExport.fmap render80) $
+ concretes2canonical opts (srcAbsName gr cnc) gr
+
+ grammar2json (cnc,gr) = encodeJSON (render absname ++ ".json") gr_canon
+ where absname = srcAbsName gr cnc
+ gr_canon = grammar2canonical opts absname gr
+
+ writeExport (path,s) = writing opts path $ writeUTF8File path s
-- | Create a @.pgf@ file (and possibly files in other formats, if specified
@@ -113,7 +135,7 @@ unionPGFFiles opts fs =
doIt =
do pgfs <- mapM readPGFVerbose fs
let pgf = foldl1 (\one two -> fromMaybe two (unionPGF one two)) pgfs
- pgfFile = outputPath opts (grammarName opts pgf <.> "pgf")
+ let pgfFile = outputPath opts (grammarName opts pgf <.> "pgf")
if pgfFile `elem` fs
then putStrLnE $ "Refusing to overwrite " ++ pgfFile
else writeGrammar opts pgf
@@ -135,7 +157,7 @@ writeOutputs opts pgf = do
-- A split PGF file is output if the @-split-pgf@ option is used.
writeGrammar :: Options -> PGF -> IOE ()
writeGrammar opts pgf =
- if flag optSplitPGF opts then writeSplitPGF else writeNormalPGF
+ if flag optSplitPGF opts then writeSplitPGF else writeNormalPGF
where
writeNormalPGF =
do let outfile = outputPath opts (grammarName opts pgf <.> "pgf")
@@ -144,9 +166,9 @@ writeGrammar opts pgf =
writeSplitPGF =
do let outfile = outputPath opts (grammarName opts pgf <.> "pgf")
writing opts outfile $ writePGF outfile pgf
- forM_ (languages pgf) $ \lang -> do
- let outfile = outputPath opts (showCId lang <.> "pgf_c")
- writing opts outfile (writeConcr outfile pgf lang)
+ forM_ (Map.toList (languages pgf)) $ \(concrname,concr) -> do
+ let outfile = outputPath opts (concrname <.> "pgf_c")
+ writing opts outfile (writeConcr outfile concr)
writeOutput :: Options -> FilePath-> String -> IOE ()
@@ -156,7 +178,7 @@ writeOutput opts file str = writing opts path $ writeUTF8File path str
-- * Useful helper functions
grammarName :: Options -> PGF -> String
-grammarName opts pgf = grammarName' opts (showCId (abstractName pgf))
+grammarName opts pgf = grammarName' opts (abstractName pgf)
grammarName' opts abs = fromMaybe abs (flag optName opts)
outputJustPGF opts = null (flag optOutputFormats opts) && not (flag optSplitPGF opts)
diff --git a/src/compiler/GF/Grammar/Canonical.hs b/src/compiler/GF/Grammar/Canonical.hs
new file mode 100644
index 000000000..4adff02f2
--- /dev/null
+++ b/src/compiler/GF/Grammar/Canonical.hs
@@ -0,0 +1,313 @@
+-- |
+-- Module : GF.Grammar.Canonical
+-- Stability : provisional
+--
+-- Abstract syntax for canonical GF grammars, i.e. what's left after
+-- high-level constructions such as functors and opers have been eliminated
+-- by partial evaluation. This is intended as a common intermediate
+-- representation to simplify export to other formats.
+
+{-# LANGUAGE DeriveTraversable #-}
+module GF.Grammar.Canonical where
+import Prelude hiding ((<>))
+import GF.Text.Pretty
+
+-- | A Complete grammar
+data Grammar = Grammar Abstract [Concrete] deriving Show
+
+--------------------------------------------------------------------------------
+-- ** Abstract Syntax
+
+-- | Abstract Syntax
+data Abstract = Abstract ModId Flags [CatDef] [FunDef] deriving Show
+abstrName (Abstract mn _ _ _) = mn
+
+data CatDef = CatDef CatId [CatId] deriving Show
+data FunDef = FunDef FunId Type deriving Show
+data Type = Type [TypeBinding] TypeApp deriving Show
+data TypeApp = TypeApp CatId [Type] deriving Show
+
+data TypeBinding = TypeBinding VarId Type deriving Show
+
+--------------------------------------------------------------------------------
+-- ** Concreate syntax
+
+-- | Concrete Syntax
+data Concrete = Concrete ModId ModId Flags [ParamDef] [LincatDef] [LinDef]
+ deriving Show
+concName (Concrete cnc _ _ _ _ _) = cnc
+
+data ParamDef = ParamDef ParamId [ParamValueDef]
+ | ParamAliasDef ParamId LinType
+ deriving Show
+data LincatDef = LincatDef CatId LinType deriving Show
+data LinDef = LinDef FunId [VarId] LinValue deriving Show
+
+-- | Linearization type, RHS of @lincat@
+data LinType = FloatType
+ | IntType
+ | ParamType ParamType
+ | RecordType [RecordRowType]
+ | StrType
+ | TableType LinType LinType
+ | TupleType [LinType]
+ deriving (Eq,Ord,Show)
+
+newtype ParamType = ParamTypeId ParamId deriving (Eq,Ord,Show)
+
+-- | Linearization value, RHS of @lin@
+data LinValue = ConcatValue LinValue LinValue
+ | LiteralValue LinLiteral
+ | ErrorValue String
+ | ParamConstant ParamValue
+ | PredefValue PredefId
+ | RecordValue [RecordRowValue]
+ | TableValue LinType [TableRowValue]
+--- | VTableValue LinType [LinValue]
+ | TupleValue [LinValue]
+ | VariantValue [LinValue]
+ | VarValue VarValueId
+ | PreValue [([String], LinValue)] LinValue
+ | Projection LinValue LabelId
+ | Selection LinValue LinValue
+ | CommentedValue String LinValue
+ deriving (Eq,Ord,Show)
+
+data LinLiteral = FloatConstant Float
+ | IntConstant Int
+ | StrConstant String
+ deriving (Eq,Ord,Show)
+
+data LinPattern = ParamPattern ParamPattern
+ | RecordPattern [RecordRow LinPattern]
+ | TuplePattern [LinPattern]
+ | WildPattern
+ deriving (Eq,Ord,Show)
+
+type ParamValue = Param LinValue
+type ParamPattern = Param LinPattern
+type ParamValueDef = Param ParamId
+
+data Param arg = Param ParamId [arg]
+ deriving (Eq,Ord,Show,Functor,Foldable,Traversable)
+
+type RecordRowType = RecordRow LinType
+type RecordRowValue = RecordRow LinValue
+type TableRowValue = TableRow LinValue
+
+data RecordRow rhs = RecordRow LabelId rhs
+ deriving (Eq,Ord,Show,Functor,Foldable,Traversable)
+data TableRow rhs = TableRow LinPattern rhs
+ deriving (Eq,Ord,Show,Functor,Foldable,Traversable)
+
+-- *** Identifiers in Concrete Syntax
+
+newtype PredefId = PredefId Id deriving (Eq,Ord,Show)
+newtype LabelId = LabelId Id deriving (Eq,Ord,Show)
+data VarValueId = VarValueId QualId deriving (Eq,Ord,Show)
+
+-- | Name of param type or param value
+newtype ParamId = ParamId QualId deriving (Eq,Ord,Show)
+
+--------------------------------------------------------------------------------
+-- ** Used in both Abstract and Concrete Syntax
+
+newtype ModId = ModId Id deriving (Eq,Ord,Show)
+
+newtype CatId = CatId Id deriving (Eq,Ord,Show)
+newtype FunId = FunId Id deriving (Eq,Show)
+
+data VarId = Anonymous | VarId Id deriving Show
+
+newtype Flags = Flags [(FlagName,FlagValue)] deriving Show
+type FlagName = Id
+data FlagValue = Str String | Int Int | Flt Double deriving Show
+
+
+-- *** Identifiers
+
+type Id = String
+data QualId = Qual ModId Id | Unqual Id deriving (Eq,Ord,Show)
+
+--------------------------------------------------------------------------------
+-- ** Pretty printing
+
+instance Pretty Grammar where
+ pp (Grammar abs cncs) = abs $+$ vcat cncs
+
+instance Pretty Abstract where
+ pp (Abstract m flags cats funs) =
+ "abstract" <+> m <+> "=" <+> "{" $$
+ flags $$
+ "cat" <+> fsep cats $$
+ "fun" <+> vcat funs $$
+ "}"
+
+instance Pretty CatDef where
+ pp (CatDef c cs) = hsep (c:cs)<>";"
+
+instance Pretty FunDef where
+ pp (FunDef f ty) = f <+> ":" <+> ty <>";"
+
+instance Pretty Type where
+ pp (Type bs ty) = sep (punctuate " ->" (map pp bs ++ [pp ty]))
+
+instance PPA Type where
+ ppA (Type [] (TypeApp c [])) = pp c
+ ppA t = parens t
+
+instance Pretty TypeBinding where
+ pp (TypeBinding Anonymous (Type [] tapp)) = pp tapp
+ pp (TypeBinding Anonymous ty) = parens ty
+ pp (TypeBinding (VarId x) ty) = parens (x<+>":"<+>ty)
+
+instance Pretty TypeApp where
+ pp (TypeApp c targs) = c<+>hsep (map ppA targs)
+
+instance Pretty VarId where
+ pp Anonymous = pp "_"
+ pp (VarId x) = pp x
+
+--------------------------------------------------------------------------------
+
+instance Pretty Concrete where
+ pp (Concrete cncid absid flags params lincats lins) =
+ "concrete" <+> cncid <+> "of" <+> absid <+> "=" <+> "{" $$
+ vcat params $$
+ section "lincat" lincats $$
+ section "lin" lins $$
+ "}"
+ where
+ section name [] = empty
+ section name ds = name <+> vcat (map (<> ";") ds)
+
+instance Pretty ParamDef where
+ pp (ParamDef p pvs) = hang ("param"<+> p <+> "=") 4 (punctuate " |" pvs)<>";"
+ pp (ParamAliasDef p t) = hang ("oper"<+> p <+> "=") 4 t<>";"
+
+instance PPA arg => Pretty (Param arg) where
+ pp (Param p ps) = pp p<+>sep (map ppA ps)
+
+instance PPA arg => PPA (Param arg) where
+ ppA (Param p []) = pp p
+ ppA pv = parens pv
+
+instance Pretty LincatDef where
+ pp (LincatDef c lt) = hang (c <+> "=") 4 lt
+
+instance Pretty LinType where
+ pp lt = case lt of
+ FloatType -> pp "Float"
+ IntType -> pp "Int"
+ ParamType pt -> pp pt
+ RecordType rs -> block rs
+ StrType -> pp "Str"
+ TableType pt lt -> sep [pt <+> "=>",pp lt]
+ TupleType lts -> "<"<>punctuate "," lts<>">"
+
+instance RhsSeparator LinType where rhsSep _ = pp ":"
+
+instance Pretty ParamType where
+ pp (ParamTypeId p) = pp p
+
+instance Pretty LinDef where
+ pp (LinDef f xs lv) = hang (f<+>hsep xs<+>"=") 4 lv
+
+instance Pretty LinValue where
+ pp lv = case lv of
+ ConcatValue v1 v2 -> sep [v1 <+> "++",pp v2]
+ ErrorValue s -> "Predef.error"<+>doubleQuotes s
+ ParamConstant pv -> pp pv
+ Projection lv l -> ppA lv<>"."<>l
+ Selection tv pv -> ppA tv<>"!"<>ppA pv
+ VariantValue vs -> "variants"<+>block vs
+ CommentedValue s v -> "{-" <+> s <+> "-}" $$ v
+ _ -> ppA lv
+
+instance PPA LinValue where
+ ppA lv = case lv of
+ LiteralValue l -> ppA l
+ ParamConstant pv -> ppA pv
+ PredefValue p -> ppA p
+ RecordValue [] -> pp "<>"
+ RecordValue rvs -> block rvs
+ PreValue alts def ->
+ "pre"<+>block (map alt alts++["_"<+>"=>"<+>def])
+ where
+ alt (ss,lv) = hang (hcat (punctuate "|" (map doubleQuotes ss)))
+ 2 ("=>"<+>lv)
+ TableValue _ tvs -> "table"<+>block tvs
+-- VTableValue t ts -> "table"<+>t<+>brackets (semiSep ts)
+ TupleValue lvs -> "<"<>punctuate "," lvs<>">"
+ VarValue v -> pp v
+ _ -> parens lv
+
+instance Pretty LinLiteral where pp = ppA
+
+instance PPA LinLiteral where
+ ppA l = case l of
+ FloatConstant f -> pp f
+ IntConstant n -> pp n
+ StrConstant s -> doubleQuotes s -- hmm
+
+instance RhsSeparator LinValue where rhsSep _ = pp "="
+
+instance Pretty LinPattern where
+ pp p =
+ case p of
+ ParamPattern pv -> pp pv
+ _ -> ppA p
+
+instance PPA LinPattern where
+ ppA p =
+ case p of
+ ParamPattern pv -> ppA pv
+ RecordPattern r -> block r
+ TuplePattern ps -> "<"<>punctuate "," ps<>">"
+ WildPattern -> pp "_"
+ _ -> parens p
+
+instance RhsSeparator LinPattern where rhsSep _ = pp "="
+
+instance RhsSeparator rhs => Pretty (RecordRow rhs) where
+ pp (RecordRow l v) = hang (l<+>rhsSep v) 2 v
+
+instance Pretty rhs => Pretty (TableRow rhs) where
+ pp (TableRow l v) = hang (l<+>"=>") 2 v
+
+--------------------------------------------------------------------------------
+instance Pretty ModId where pp (ModId s) = pp s
+instance Pretty CatId where pp (CatId s) = pp s
+instance Pretty FunId where pp (FunId s) = pp s
+instance Pretty LabelId where pp (LabelId s) = pp s
+instance Pretty PredefId where pp = ppA
+instance PPA PredefId where ppA (PredefId s) = "Predef."<>s
+instance Pretty ParamId where pp = ppA
+instance PPA ParamId where ppA (ParamId s) = pp s
+instance Pretty VarValueId where pp (VarValueId s) = pp s
+
+instance Pretty QualId where pp = ppA
+
+instance PPA QualId where
+ ppA (Qual m n) = m<>"_"<>n -- hmm
+ ppA (Unqual n) = pp n
+
+instance Pretty Flags where
+ pp (Flags []) = empty
+ pp (Flags flags) = "flags" <+> vcat (map ppFlag flags)
+ where
+ ppFlag (name,value) = name <+> "=" <+> value <>";"
+
+instance Pretty FlagValue where
+ pp (Str s) = pp s
+ pp (Int i) = pp i
+ pp (Flt d) = pp d
+
+--------------------------------------------------------------------------------
+-- | Pretty print atomically (i.e. wrap it in parentheses if necessary)
+class Pretty a => PPA a where ppA :: a -> Doc
+
+class Pretty rhs => RhsSeparator rhs where rhsSep :: rhs -> Doc
+
+semiSep xs = punctuate ";" xs
+block xs = braces (semiSep xs)
diff --git a/src/compiler/GF/Grammar/CanonicalJSON.hs b/src/compiler/GF/Grammar/CanonicalJSON.hs
new file mode 100644
index 000000000..8b3464674
--- /dev/null
+++ b/src/compiler/GF/Grammar/CanonicalJSON.hs
@@ -0,0 +1,289 @@
+module GF.Grammar.CanonicalJSON (
+ encodeJSON
+ ) where
+
+import Text.JSON
+import Control.Applicative ((<|>))
+import Data.Ratio (denominator, numerator)
+import GF.Grammar.Canonical
+
+
+encodeJSON :: FilePath -> Grammar -> IO ()
+encodeJSON fpath g = writeFile fpath (encode g)
+
+
+-- in general we encode grammars using JSON objects/records,
+-- except for newtypes/coercions/direct values
+
+-- the top-level definitions use normal record labels,
+-- but recursive types/values/ids use labels staring with a "."
+
+instance JSON Grammar where
+ showJSON (Grammar abs cncs) = makeObj [("abstract", showJSON abs), ("concretes", showJSON cncs)]
+
+ readJSON o = Grammar <$> o!"abstract" <*> o!"concretes"
+
+
+--------------------------------------------------------------------------------
+-- ** Abstract Syntax
+
+instance JSON Abstract where
+ showJSON (Abstract absid flags cats funs)
+ = makeObj [("abs", showJSON absid),
+ ("flags", showJSON flags),
+ ("cats", showJSON cats),
+ ("funs", showJSON funs)]
+
+ readJSON o = Abstract
+ <$> o!"abs"
+ <*>(o!"flags" <|> return (Flags []))
+ <*> o!"cats"
+ <*> o!"funs"
+
+instance JSON CatDef where
+ -- non-dependent categories are encoded as simple strings:
+ showJSON (CatDef c []) = showJSON c
+ showJSON (CatDef c cs) = makeObj [("cat", showJSON c), ("args", showJSON cs)]
+
+ readJSON o = CatDef <$> readJSON o <*> return []
+ <|> CatDef <$> o!"cat" <*> o!"args"
+
+instance JSON FunDef where
+ showJSON (FunDef f ty) = makeObj [("fun", showJSON f), ("type", showJSON ty)]
+
+ readJSON o = FunDef <$> o!"fun" <*> o!"type"
+
+instance JSON Type where
+ showJSON (Type bs ty) = makeObj [(".args", showJSON bs), (".result", showJSON ty)]
+
+ readJSON o = Type <$> o!".args" <*> o!".result"
+
+instance JSON TypeApp where
+ -- non-dependent categories are encoded as simple strings:
+ showJSON (TypeApp c []) = showJSON c
+ showJSON (TypeApp c args) = makeObj [(".cat", showJSON c), (".args", showJSON args)]
+
+ readJSON o = TypeApp <$> readJSON o <*> return []
+ <|> TypeApp <$> o!".cat" <*> o!".args"
+
+instance JSON TypeBinding where
+ -- non-dependent categories are encoded as simple strings:
+ showJSON (TypeBinding Anonymous (Type [] (TypeApp c []))) = showJSON c
+ showJSON (TypeBinding x ty) = makeObj [(".var", showJSON x), (".type", showJSON ty)]
+
+ readJSON o = do c <- readJSON o
+ return (TypeBinding Anonymous (Type [] (TypeApp c [])))
+ <|> TypeBinding <$> o!".var" <*> o!".type"
+
+
+--------------------------------------------------------------------------------
+-- ** Concrete syntax
+
+instance JSON Concrete where
+ showJSON (Concrete cncid absid flags params lincats lins)
+ = makeObj [("cnc", showJSON cncid),
+ ("abs", showJSON absid),
+ ("flags", showJSON flags),
+ ("params", showJSON params),
+ ("lincats", showJSON lincats),
+ ("lins", showJSON lins)]
+
+ readJSON o = Concrete
+ <$> o!"cnc"
+ <*> o!"abs"
+ <*>(o!"flags" <|> return (Flags []))
+ <*> o!"params"
+ <*> o!"lincats"
+ <*> o!"lins"
+
+instance JSON ParamDef where
+ showJSON (ParamDef p pvs) = makeObj [("param", showJSON p), ("values", showJSON pvs)]
+ showJSON (ParamAliasDef p t) = makeObj [("param", showJSON p), ("alias", showJSON t)]
+
+ readJSON o = ParamDef <$> o!"param" <*> o!"values"
+ <|> ParamAliasDef <$> o!"param" <*> o!"alias"
+
+instance JSON LincatDef where
+ showJSON (LincatDef c lt) = makeObj [("cat", showJSON c), ("lintype", showJSON lt)]
+
+ readJSON o = LincatDef <$> o!"cat" <*> o!"lintype"
+
+instance JSON LinDef where
+ showJSON (LinDef f xs lv) = makeObj [("fun", showJSON f), ("args", showJSON xs), ("lin", showJSON lv)]
+
+ readJSON o = LinDef <$> o!"fun" <*> o!"args" <*> o!"lin"
+
+instance JSON LinType where
+ -- the basic types (Str, Float, Int) are encoded as strings:
+ showJSON (StrType) = showJSON "Str"
+ showJSON (FloatType) = showJSON "Float"
+ showJSON (IntType) = showJSON "Int"
+ -- parameters are also encoded as strings:
+ showJSON (ParamType pt) = showJSON pt
+ -- tables/tuples are encoded as JSON objects:
+ showJSON (TableType pt lt) = makeObj [(".tblarg", showJSON pt), (".tblval", showJSON lt)]
+ showJSON (TupleType lts) = makeObj [(".tuple", showJSON lts)]
+ -- records are encoded as records:
+ showJSON (RecordType rows) = showJSON rows
+
+ readJSON o = do "Str" <- readJSON o; return StrType
+ <|> do "Float" <- readJSON o; return FloatType
+ <|> do "Int" <- readJSON o; return IntType
+ <|> do ptype <- readJSON o; return (ParamType ptype)
+ <|> TableType <$> o!".tblarg" <*> o!".tblval"
+ <|> TupleType <$> o!".tuple"
+ <|> RecordType <$> readJSON o
+
+instance JSON LinValue where
+ showJSON (LiteralValue l ) = showJSON l
+ -- most values are encoded as JSON objects:
+ showJSON (ParamConstant pv) = makeObj [(".param", showJSON pv)]
+ showJSON (PredefValue p ) = makeObj [(".predef", showJSON p)]
+ showJSON (TableValue t tvs) = makeObj [(".tblarg", showJSON t), (".tblrows", showJSON tvs)]
+ showJSON (TupleValue lvs) = makeObj [(".tuple", showJSON lvs)]
+ showJSON (VarValue v ) = makeObj [(".var", showJSON v)]
+ showJSON (ErrorValue s ) = makeObj [(".error", showJSON s)]
+ showJSON (Projection lv l ) = makeObj [(".project", showJSON lv), (".label", showJSON l)]
+ showJSON (Selection tv pv) = makeObj [(".select", showJSON tv), (".key", showJSON pv)]
+ showJSON (VariantValue vs) = makeObj [(".variants", showJSON vs)]
+ showJSON (PreValue pre def) = makeObj [(".pre", showJSON pre),(".default", showJSON def)]
+ -- records are encoded directly as JSON records:
+ showJSON (RecordValue rows) = showJSON rows
+ -- concatenation is encoded as a JSON array:
+ showJSON v@(ConcatValue _ _) = showJSON (flatten v [])
+ where flatten (ConcatValue v v') = flatten v . flatten v'
+ flatten v = (v :)
+
+ readJSON o = LiteralValue <$> readJSON o
+ <|> ParamConstant <$> o!".param"
+ <|> PredefValue <$> o!".predef"
+ <|> TableValue <$> o!".tblarg" <*> o!".tblrows"
+ <|> TupleValue <$> o!".tuple"
+ <|> VarValue <$> o!".var"
+ <|> ErrorValue <$> o!".error"
+ <|> Projection <$> o!".project" <*> o!".label"
+ <|> Selection <$> o!".select" <*> o!".key"
+ <|> VariantValue <$> o!".variants"
+ <|> PreValue <$> o!".pre" <*> o!".default"
+ <|> RecordValue <$> readJSON o
+ <|> do vs <- readJSON o :: Result [LinValue]
+ return (foldr1 ConcatValue vs)
+
+instance JSON LinLiteral where
+ -- basic values (Str, Float, Int) are encoded as JSON strings/numbers:
+ showJSON (StrConstant s) = showJSON s
+ showJSON (FloatConstant f) = showJSON f
+ showJSON (IntConstant n) = showJSON n
+
+ readJSON = readBasicJSON StrConstant IntConstant FloatConstant
+
+instance JSON LinPattern where
+ -- wildcards and patterns without arguments are encoded as strings:
+ showJSON (WildPattern) = showJSON "_"
+ showJSON (ParamPattern (Param p [])) = showJSON p
+ -- complex patterns are encoded as JSON objects:
+ showJSON (ParamPattern pv) = showJSON pv
+ -- and records as records:
+ showJSON (RecordPattern r) = showJSON r
+
+ readJSON o = do "_" <- readJSON o; return WildPattern
+ <|> do p <- readJSON o; return (ParamPattern (Param p []))
+ <|> ParamPattern <$> readJSON o
+ <|> RecordPattern <$> readJSON o
+
+instance JSON arg => JSON (Param arg) where
+ -- parameters without arguments are encoded as strings:
+ showJSON (Param p []) = showJSON p
+ showJSON (Param p args) = makeObj [(".paramid", showJSON p), (".args", showJSON args)]
+
+ readJSON o = Param <$> readJSON o <*> return []
+ <|> Param <$> o!".paramid" <*> o!".args"
+
+instance JSON a => JSON (RecordRow a) where
+ -- record rows and lists of record rows are both encoded as JSON records (i.e., objects)
+ showJSON row = showJSONs [row]
+ showJSONs rows = makeObj (map toRow rows)
+ where toRow (RecordRow (LabelId lbl) val) = (lbl, showJSON val)
+
+ readJSON obj = head <$> readJSONs obj
+ readJSONs obj = mapM fromRow (assocsJSObject obj)
+ where fromRow (lbl, jsvalue) = do value <- readJSON jsvalue
+ return (RecordRow (LabelId lbl) value)
+
+instance JSON rhs => JSON (TableRow rhs) where
+ showJSON (TableRow l v) = makeObj [(".pattern", showJSON l), (".value", showJSON v)]
+
+ readJSON o = TableRow <$> o!".pattern" <*> o!".value"
+
+
+-- *** Identifiers in Concrete Syntax
+
+instance JSON PredefId where showJSON (PredefId s) = showJSON s ; readJSON = fmap PredefId . readJSON
+instance JSON LabelId where showJSON (LabelId s) = showJSON s ; readJSON = fmap LabelId . readJSON
+instance JSON VarValueId where showJSON (VarValueId s) = showJSON s ; readJSON = fmap VarValueId . readJSON
+instance JSON ParamId where showJSON (ParamId s) = showJSON s ; readJSON = fmap ParamId . readJSON
+instance JSON ParamType where showJSON (ParamTypeId s) = showJSON s ; readJSON = fmap ParamTypeId . readJSON
+
+
+--------------------------------------------------------------------------------
+-- ** Used in both Abstract and Concrete Syntax
+
+instance JSON ModId where showJSON (ModId s) = showJSON s ; readJSON = fmap ModId . readJSON
+instance JSON CatId where showJSON (CatId s) = showJSON s ; readJSON = fmap CatId . readJSON
+instance JSON FunId where showJSON (FunId s) = showJSON s ; readJSON = fmap FunId . readJSON
+
+instance JSON VarId where
+ -- the anonymous variable is the underscore:
+ showJSON Anonymous = showJSON "_"
+ showJSON (VarId x) = showJSON x
+
+ readJSON o = do "_" <- readJSON o; return Anonymous
+ <|> VarId <$> readJSON o
+
+instance JSON QualId where
+ showJSON (Qual (ModId m) n) = showJSON (m++"."++n)
+ showJSON (Unqual n) = showJSON n
+
+ readJSON o = do qualid <- readJSON o
+ let (mod, id) = span (/= '.') qualid
+ return $ if null mod then Unqual id else Qual (ModId mod) id
+
+instance JSON Flags where
+ -- flags are encoded directly as JSON records (i.e., objects):
+ showJSON (Flags fs) = makeObj [(f, showJSON v) | (f, v) <- fs]
+
+ readJSON obj = Flags <$> mapM fromRow (assocsJSObject obj)
+ where fromRow (lbl, jsvalue) = do value <- readJSON jsvalue
+ return (lbl, value)
+
+instance JSON FlagValue where
+ -- flag values are encoded as basic JSON types:
+ showJSON (Str s) = showJSON s
+ showJSON (Int i) = showJSON i
+ showJSON (Flt f) = showJSON f
+
+ readJSON = readBasicJSON Str Int Flt
+
+
+--------------------------------------------------------------------------------
+-- ** Convenience functions
+
+(!) :: JSON a => JSValue -> String -> Result a
+obj ! key = maybe (fail $ "CanonicalJSON.(!): Could not find key: " ++ show key)
+ readJSON
+ (lookup key (assocsJSObject obj))
+
+assocsJSObject :: JSValue -> [(String, JSValue)]
+assocsJSObject (JSObject o) = fromJSObject o
+assocsJSObject (JSArray _) = fail $ "CanonicalJSON.assocsJSObject: Expected a JSON object, found an Array"
+assocsJSObject jsvalue = fail $ "CanonicalJSON.assocsJSObject: Expected a JSON object, found " ++ show jsvalue
+
+
+readBasicJSON :: (JSON int, Integral int, JSON flt, RealFloat flt) =>
+ (String -> v) -> (int -> v) -> (flt -> v) -> JSValue -> Result v
+readBasicJSON str int flt o
+ = str <$> readJSON o
+ <|> int_or_flt <$> readJSON o
+ where int_or_flt f | f == fromIntegral n = int n
+ | otherwise = flt f
+ where n = round f
diff --git a/src/compiler/GF/Grammar/Printer.hs b/src/compiler/GF/Grammar/Printer.hs
index dcd419c42..58892db11 100644
--- a/src/compiler/GF/Grammar/Printer.hs
+++ b/src/compiler/GF/Grammar/Printer.hs
@@ -208,7 +208,7 @@ ppTerm q d (S x y) = case x of
ppTerm q d (ExtR x y) = prec d 3 (ppTerm q 3 x <+> "**" <+> ppTerm q 4 y)
ppTerm q d (App x y) = prec d 4 (ppTerm q 4 x <+> ppTerm q 5 y)
ppTerm q d (V e es) = hang "table" 2 (sep [ppTerm q 6 e,brackets (fsep (punctuate ';' (map (ppTerm q 0) es)))])
-ppTerm q d (FV es) = "variants" <+> braces (fsep (punctuate ';' (map (ppTerm q 0) es)))
+ppTerm q d (FV es) = prec d 4 ("variants" <+> braces (fsep (punctuate ';' (map (ppTerm q 0) es))))
ppTerm q d (AdHocOverload es) = "overload" <+> braces (fsep (punctuate ';' (map (ppTerm q 0) es)))
ppTerm q d (Alts e xs) = prec d 4 ("pre" <+> braces (ppTerm q 0 e <> ';' <+> fsep (punctuate ';' (map (ppAltern q) xs))))
ppTerm q d (Strs es) = "strs" <+> braces (fsep (punctuate ';' (map (ppTerm q 0) es)))
diff --git a/src/compiler/GF/Haskell.hs b/src/compiler/GF/Haskell.hs
index 57601c1d5..8cb8a9177 100644
--- a/src/compiler/GF/Haskell.hs
+++ b/src/compiler/GF/Haskell.hs
@@ -40,6 +40,9 @@ tvar = TId
tcon0 = TId
tcon c = foldl TAp (TId c)
+lets [] e = e
+lets ds e = Lets ds e
+
let1 x xe e = Lets [(x,xe)] e
single x = List [x]
@@ -113,7 +116,8 @@ instance Pretty Exp where
Op e1 op e2 -> hang (ppB e1<+>op) 2 (ppB e2)
Lets bs e -> sep ["let"<+>vcat [hang (x<+>"=") 2 xe|(x,xe)<-bs],
"in" <+>e]
- LambdaCase alts -> hang "\\case" 4 (vcat [p<+>"->"<+>e|(p,e)<-alts])
+ LambdaCase alts ->
+ hang "\\case" 2 (vcat [hang (p<+>"->") 2 e|(p,e)<-alts])
_ -> ppB e
ppB e = case flatAp e of f:as -> hang (ppA f) 2 (sep (map ppA as))
diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs
index 61ccd8f80..f30ee79c9 100644
--- a/src/compiler/GF/Infra/Option.hs
+++ b/src/compiler/GF/Infra/Option.hs
@@ -83,7 +83,10 @@ data Phase = Preproc | Convert | Compile | Link
deriving (Show,Eq,Ord)
data OutputFormat = FmtPGFPretty
+ | FmtCanonicalGF
+ | FmtCanonicalJson
| FmtJavaScript
+ | FmtJSON
| FmtPython
| FmtHaskell
| FmtJava
@@ -318,7 +321,8 @@ optDescr =
Option [] ["gfo-dir"] (ReqArg gfoDir "DIR") "Directory to put .gfo files in (default = '.').",
Option ['f'] ["output-format"] (ReqArg outFmt "FMT")
(unlines ["Output format. FMT can be one of:",
- "Multiple concrete: pgf (default), js, pgf_pretty, prolog, python, ...", -- gar,
+ "Canonical GF grammar: canonical_gf, canonical_json, (and haskell with option --haskell=concrete)",
+ "Multiple concrete: pgf (default), json, js, pgf_pretty, prolog, python, ...", -- gar,
"Single concrete only: bnf, ebnf, fa, gsl, jsgf, regexp, slf, srgs_xml, srgs_abnf, vxml, ....", -- cf, lbnf,
"Abstract only: haskell, ..."]), -- prolog_abs,
Option [] ["sisr"] (ReqArg sisrFmt "FMT")
@@ -366,8 +370,6 @@ optDescr =
"Enable or disable global grammar optimization. This could significantly reduce the size of the final PGF file",
Option [] ["split-pgf"] (NoArg (splitPGF True))
"Split the PGF into one file per language. This allows the runtime to load only individual languages",
- Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).",
- Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).",
Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...",
Option [] ["heuristic_search_factor"] (ReqArg (readDouble (\d o -> o { optHeuristicFactor = Just d })) "FACTOR") "Set the heuristic search factor for statistical parsing",
Option [] ["case_sensitive"] (onOff (\v -> set $ \o -> o{optCaseSensitive=v}) True) "Set the parser in case-sensitive/insensitive mode [sensitive by default]",
@@ -441,8 +443,6 @@ optDescr =
optimize_pgf x = set $ \o -> o { optOptimizePGF = x }
splitPGF x = set $ \o -> o { optSplitPGF = x }
- toggleOptimize x b = set $ setOptimization' x b
-
cfgTransform x = let (x', b) = case x of
'n':'o':'-':rest -> (rest, False)
_ -> (x, True)
@@ -465,7 +465,10 @@ outputFormats = map fst outputFormatsExpl
outputFormatsExpl :: [((String,OutputFormat),String)]
outputFormatsExpl =
[(("pgf_pretty", FmtPGFPretty),"human-readable pgf"),
+ (("canonical_gf", FmtCanonicalGF),"Canonical GF source files"),
+ (("canonical_json", FmtCanonicalJson),"Canonical JSON source files"),
(("js", FmtJavaScript),"JavaScript (whole grammar)"),
+ (("json", FmtJSON),"JSON (whole grammar)"),
(("python", FmtPython),"Python (whole grammar)"),
(("haskell", FmtHaskell),"Haskell (abstract syntax)"),
(("java", FmtJava),"Java (abstract syntax)"),
diff --git a/src/compiler/GF/Main.hs b/src/compiler/GF/Main.hs
index 24f7b78f1..c853aa21b 100644
--- a/src/compiler/GF/Main.hs
+++ b/src/compiler/GF/Main.hs
@@ -20,7 +20,7 @@ import GF.System.Console (setConsoleEncoding)
-- Run @gf --help@ for usage info.
main :: IO ()
main = do
- setConsoleEncoding
+ --setConsoleEncoding
uncurry mainOpts =<< getOptions
-- | Get and parse GF command line arguments. Fix relative paths.
diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs
index bc9df2abc..188981fc9 100644
--- a/src/compiler/GF/Speech/PGFToCFG.hs
+++ b/src/compiler/GF/Speech/PGFToCFG.hs
@@ -6,8 +6,8 @@
----------------------------------------------------------------------
module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where
-import PGF
-import PGF.Internal
+import PGF2
+import PGF2.Internal
import GF.Grammar.CFG hiding (Symbol)
import Data.Map (Map)
@@ -16,28 +16,25 @@ import qualified Data.IntMap as IntMap
import Data.Set (Set)
import qualified Data.Set as Set
-bnfPrinter :: PGF -> CId -> String
+bnfPrinter :: PGF -> Concr -> String
bnfPrinter = toBNF id
-toBNF :: (CFG -> CFG) -> PGF -> CId -> String
+toBNF :: (CFG -> CFG) -> PGF -> Concr -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
type Profile = [Int]
-pgfToCFG :: PGF
- -> CId -- ^ Concrete syntax name
- -> CFG
-pgfToCFG pgf lang = mkCFG (showCId start_cat) extCats (startRules ++ concatMap ruleToCFRule rules)
+pgfToCFG :: PGF -> Concr -> CFG
+pgfToCFG pgf cnc = mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule rules)
where
(_,start_cat,_) = unType (startCat pgf)
- cnc = lookConcr pgf lang
rules :: [(FId,Production)]
rules = [(fcat,prod) | fcat <- [0..concrTotalCats cnc],
prod <- concrProductions cnc fcat]
fcatCats :: Map FId Cat
- fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
+ fcatCats = Map.fromList [(fc, c ++ "_" ++ show i)
| (c,s,e,lbls) <- concrCategories cnc,
(fc,i) <- zip [s..e] [1..]]
@@ -64,7 +61,7 @@ pgfToCFG pgf lang = mkCFG (showCId start_cat) extCats (startRules ++ concatMap r
extCats = Set.fromList $ map ruleLhs startRules
startRules :: [CFRule]
- startRules = [Rule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
+ startRules = [Rule c [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,s,e,lbls) <- concrCategories cnc,
fc <- [s..e], not (isPredefFId fc),
r <- [0..catLinArity fc-1]]
@@ -113,7 +110,7 @@ pgfToCFG pgf lang = mkCFG (showCId start_cat) extCats (startRules ++ concatMap r
where Just (hypos,_,_) = fmap unType (functionType pgf f)
argTypes = [cat | (_,_,ty) <- hypos, let (_,cat,_) = unType ty]
- profileToTerm :: CId -> Profile -> CFTerm
+ profileToTerm :: Fun -> Profile -> CFTerm
profileToTerm t [] = CFMeta t
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
ruleToCFRule (c,PCoerce c') =
diff --git a/src/compiler/GF/System/Directory.hs b/src/compiler/GF/System/Directory.hs
index 898646063..be91e758e 100644
--- a/src/compiler/GF/System/Directory.hs
+++ b/src/compiler/GF/System/Directory.hs
@@ -8,13 +8,13 @@ import System.Directory as D
doesDirectoryExist,doesFileExist,getModificationTime,
getCurrentDirectory,getDirectoryContents,getPermissions,
removeFile,renameFile)
-import Data.Time.Compat
+--import Data.Time.Compat
canonicalizePath path = liftIO $ D.canonicalizePath path
createDirectoryIfMissing b = liftIO . D.createDirectoryIfMissing b
doesDirectoryExist path = liftIO $ D.doesDirectoryExist path
doesFileExist path = liftIO $ D.doesFileExist path
-getModificationTime path = liftIO $ fmap toUTCTime (D.getModificationTime path)
+getModificationTime path = liftIO $ {-fmap toUTCTime-} (D.getModificationTime path)
getDirectoryContents path = liftIO $ D.getDirectoryContents path
getCurrentDirectory :: MonadIO io => io FilePath
diff --git a/src/compiler/GF/Text/Pretty.hs b/src/compiler/GF/Text/Pretty.hs
index 29ca7f131..5c87ea6a3 100644
--- a/src/compiler/GF/Text/Pretty.hs
+++ b/src/compiler/GF/Text/Pretty.hs
@@ -20,6 +20,7 @@ instance Pretty a => Pretty [a] where
ppList = fsep . map pp -- hmm
render x = PP.render (pp x)
+render80 x = renderStyle style{lineLength=80,ribbonsPerLine=1} x
renderStyle s x = PP.renderStyle s (pp x)
infixl 5 $$,$+$
diff --git a/src/example-based/ExampleDemo.hs b/src/example-based/ExampleDemo.hs
deleted file mode 100644
index fe4eb501d..000000000
--- a/src/example-based/ExampleDemo.hs
+++ /dev/null
@@ -1,553 +0,0 @@
-module ExampleDemo (Environ,initial,getNext, provideExample, testThis,mkFuncWithArg,searchGoodTree,isMeta)
- where
-
-import PGF
---import System.IO
-import Data.List
---import Control.Monad
-import qualified Data.Map as Map
---import qualified Data.IntMap as IntMap
-import qualified Data.Set as Set
-import Data.Maybe
---import System.Environment (getArgs)
-import System.Random (RandomGen) --newStdGen
-
-
-type MyType = CId -- name of the categories from the program
-type ConcType = CId -- categories from the resource grammar, that we parse on
-type MyFunc = CId -- functions that we need to implement
---type FuncWithArg = ((MyFunc, MyType), Expr) -- function with arguments
-type InterInstr = [String] -- lincats that were generated but not written to the file
-
-
-
-data FuncWithArg = FuncWithArg
- {getName :: MyFunc, -- name of the function to generate
- getType :: MyType, -- return type of the function
- getTypeArgs :: [MyType] -- types of arguments
- }
- deriving (Show,Eq,Ord)
-
--- we assume that it's for English for the moment
-
-
-type TypeMap = Map.Map MyType ConcType -- mapping found from a file
-
-type ConcMap = Map.Map MyFunc Expr -- concrete expression after parsing
-
-data Environ = Env {getTypeMap :: TypeMap, -- mapping between a category in the grammar and a concrete type from RGL
- getConcMap :: ConcMap, -- concrete expression after parsing
- getSigs :: Map.Map MyType [FuncWithArg], -- functions for which we have the concrete syntax already with args
- getAll :: [FuncWithArg] -- all the functions with arguments
- }
-
-
-getNext :: Environ -> Environ -> ([MyFunc],[MyFunc])
-getNext env example_env =
- let sgs = getSigs env
- allfuncs = getAll env
- names = Set.fromList $ map getName $ concat $ Map.elems sgs
- exampleable = filter (\x -> (isJust $ getNameExpr x env)
- &&
- (not $ Set.member x names) -- maybe drop this if you want to also rewrite from examples...
- ) $ map getName allfuncs
- testeable = filter (\x -> (isJust $ getNameExpr x env )
- &&
- (Set.member x names)
- ) $ map getName allfuncs
-
- in (exampleable,testeable)
-
-
-provideExample :: RandomGen gen => gen -> Environ -> MyFunc -> PGF -> PGF -> Language -> Maybe (Expr,String)
-provideExample gen env myfunc parsePGF pgfFile lang =
- fmap giveExample $ getNameExpr myfunc env
- where
- giveExample e_ =
- let newexpr = head $ generateRandomFromDepth gen pgfFile e_ (Just 5) -- change here with the new random generator
- ty = getType $ head $ filter (\x -> getName x == myfunc) $ getAll env
- embeddedExpr = maybe "" (\x -> ", as in: " ++ q (linearize pgfFile lang x)) (embedInStart (getAll env) (Map.fromList [(ty,e_)]))
- lexpr = linearize pgfFile lang newexpr
- q s = sq++s++sq
- sq = "\""
- in (newexpr,q lexpr ++ embeddedExpr)
--- question, you need the IO monad for the random generator, how to do otherwise ??
--- question can you make the expression bold/italic - somehow distinguishable from the rest ?
-
-
-
-testThis :: Environ -> MyFunc -> PGF -> Language -> Maybe String
-testThis env myfunc parsePGF lang =
- fmap (linearize parsePGF lang . mapToResource env . llin env) $
- getNameExpr myfunc env
-
-
--- we assume that even the functions linearized by the user will still be in getSigs along with their linearization
-
-
--- fill in the blancs of an expression that we want to linearize for testing purposes
----------------------------------------------------------------------------
-
-llin :: Environ -> Expr -> Expr
-llin env expr =
- let
- (id,args) = fromJust $ unApp expr
- --cexpr = fromJust $ Map.lookup id (getConcMap env)
- in
- if any isMeta args
- then let
- sigs = concat $ Map.elems $ getSigs env
- tys = findExprWhich sigs id
- in replaceConcArg 1 tys expr env
- else mkApp id $ map (llin env) args
-
-
--- argument of the meta variable to replace, list of arguments left, expression to replace, environment, current replace expression
-replaceConcArg :: Int -> [MyType] -> Expr -> Environ -> Expr
-replaceConcArg i [] expr env = expr
-replaceConcArg i (t:ts) expr env = -- TO DO : insert randomness here !!
- let ss = fromJust $ Map.lookup t $ getSigs env
- args = filter (null . getTypeArgs) ss
- finArg = if null args then let l = last ss in llin env (mkApp (getName l) [mkMeta j | j <- [1..(length $ getTypeArgs l)]])
- else mkApp (getName $ last args) []
- in
- let newe = replaceOne i finArg expr
- in replaceConcArg (i+1) ts newe env
-
--- replace a certain metavariable with a certain expression in another expression - return updated expression
-replaceOne :: Int -> Expr -> Expr -> Expr
-replaceOne i erep expr =
- if isMeta expr && ((fromJust $ unMeta expr) == i)
- then erep
- else if isMeta expr then expr
- else let (id,args) = fromJust $ unApp expr
- in
- mkApp id $ map (replaceOne i erep) args
-
-
-findExprWhich :: [FuncWithArg] -> MyFunc -> [MyType]
-findExprWhich lst f = getTypeArgs $ head $ filter (\x -> getName x == f) lst
-
-
-mapToResource :: Environ -> Expr -> Expr
-mapToResource env expr =
- let (id,args) = maybe (error $ "tried to unwrap " ++ showExpr [] expr) (\x -> x) (unApp expr)
- cmap = getConcMap env
- cexp = maybe (error $ "didn't find " ++ showCId id ++ " in "++ show cmap) (\x -> x) (Map.lookup id cmap)
- in
- if null args then cexp
- else let newargs = map (mapToResource env) args
- in replaceAllArgs cexp 1 newargs
- where
- replaceAllArgs expr i [] = expr
- replaceAllArgs expr i (x:xs) = replaceAllArgs (replaceOne i x expr) (i+1) xs
-
-
-
------------------------------------------------
-
--- embed expression in another one from the start category
-
-embedInStart :: [FuncWithArg] -> Map.Map MyType Expr -> Maybe Expr
-embedInStart fss cs =
- let currset = Map.toList cs
- nextset = Map.fromList $ concat [ if elem myt (getTypeArgs farg)
- then connectWithArg (myt,exp) farg else []
- | (myt,exp) <- currset, farg <- fss]
- nextmap = Map.union cs nextset
- maybeExpr = Map.lookup startCateg nextset
- in if isNothing maybeExpr then
- if Map.size nextmap == Map.size cs then Nothing --error $ "could't build " ++ show startCateg ++ "with " ++ show fss
- else embedInStart fss nextmap
- else return $ fromJust maybeExpr
- where
- connectWithArg (myt,exp) farg =
- let ind = head $ elemIndices myt (getTypeArgs farg)
- in [(getType farg, mkApp (getName farg) $ [mkMeta i | i <- [1..ind]] ++ [exp] ++ [mkMeta i | i <- [(ind + 1)..((length $ getTypeArgs farg) - 1)]])]
-
-
-
-
-
------------------------------------------------
-{-
-updateConcMap :: Environ -> MyFunc -> Expr -> Environ
-updateConcMap env myf expr =
- Env (getTypeMap env) (Map.insert myf expr (getConcMap env)) (getSigs env) (getAll env)
-
-
-updateInterInstr :: Environ -> MyType -> FuncWithArg -> Environ
-updateInterInstr env myt myf =
- let ii = getSigs env
- newInterInstr =
- maybe (Map.insert myt [myf] ii) (\x -> Map.insert myt (myf:x) ii) $ Map.lookup myt ii
- in Env (getTypeMap env) (getConcMap env) newInterInstr (getAll env)
-
-
-putSignatures :: Environ -> [FuncWithArg] -> Environ
-putSignatures env fss =
- Env (getTypeMap env) (getConcMap env) (mkSigs fss) (getAll env)
-
-
-updateEnv :: Environ -> FuncWithArg -> MyType -> Expr -> Environ
-updateEnv env myf myt expr =
- let ii = getSigs env
- nn = getName myf
- newInterInstr =
- maybe (Map.insert myt [myf] ii) (\x -> Map.insert myt (myf:x) ii) $ Map.lookup myt ii
- in Env (getTypeMap env) (Map.insert nn expr (getConcMap env)) newInterInstr (getAll env)
--}
-
-mkSigs :: [FuncWithArg] -> Map.Map MyType [FuncWithArg]
-mkSigs fss = Map.fromListWith (++) $ zip (map getType fss) (map (\x -> [x]) fss)
-
-
-
-{------------------------------------
-lang :: String
-lang = "Eng"
-
-
-parseLang :: Language
-parseLang = fromJust $ readLanguage "ParseEng"
-
-
-parsePGFfile :: String
-parsePGFfile = "ParseEngAbs.pgf"
-------------------------------------}
-
-
-
-
-
-searchGoodTree :: Environ -> Expr -> [Expr] -> IO (Maybe (Expr,Expr))
-searchGoodTree env expr [] = return Nothing
-searchGoodTree env expr (e:es) =
- do val <- debugReplaceArgs expr e env
- maybe (searchGoodTree env expr es) (\x -> return $ Just (x,e)) val
-
-
-
-getNameExpr :: MyFunc -> Environ -> Maybe Expr
-getNameExpr myfunc env =
- let allfunc = filter (\x -> getName x == myfunc) $ getAll env
- in
- if null allfunc then Nothing
- else getExpr (head allfunc) env
-
--- find an expression to generate where we have all the other elements available
-getExpr :: FuncWithArg -> Environ -> Maybe Expr
-getExpr farg env =
- let tys = getTypeArgs farg
- ctx = getSigs env
- lst = getConcTypes ctx tys 1
- in if (all isJust lst) then Just $ mkApp (getName farg) (map fromJust lst)
- else Nothing
- where getConcTypes context [] i = []
- getConcTypes context (ty:types) i =
- let pos = Map.lookup ty context
- in
- if isNothing pos || (null $ fromJust pos) then [Nothing]
- else
- let mm = last $ fromJust pos
- mmargs = getTypeArgs mm
- newi = i + length mmargs - 1
- lst = getConcTypes (Map.insert ty (init $ (fromJust pos)) context) types (newi+1)
- in
- if (all isJust lst) then -- i..newi
- (Just $ mkApp (getName mm) [mkMeta j | j <- [1..(length mmargs)]]) : lst
- else [Nothing]
-
-
-
-
-
--- only covers simple expressions with meta variables, not the rest...
-isGeneralizationOf :: Expr -> Expr -> Bool
-isGeneralizationOf genExpr testExpr =
- if isMeta genExpr then True
- else if isMeta testExpr then False
- else let genUnwrap = unApp genExpr
- testUnwrap = unApp testExpr
- in if isNothing genUnwrap || isNothing testUnwrap then False -- see if you can generalize here
- else let (gencid, genargs) = fromJust genUnwrap
- (testcid, testargs) = fromJust testUnwrap
- in
- (gencid == testcid) && (length genargs == length testargs)
- && (and [isGeneralizationOf g t | (g,t) <- (zip genargs testargs)])
-
-{-do lst <- getConcTypes context types (i+1)
- return $ mkMeta i : lst -}
-
-debugReplaceArgs :: Expr -> Expr -> Environ -> IO (Maybe Expr)
-debugReplaceArgs aexpr cexpr env =
- if isNothing $ unApp aexpr then return Nothing
- else if any isNothing $ map unApp $ snd $ fromJust $ unApp aexpr then return Nothing
- else
- let args = map (fst.fromJust.unApp) $ snd $ fromJust $ unApp aexpr
- concExprs = map (\x -> fromJust $ Map.lookup x $ getConcMap env) args
- in startReplace 1 cexpr concExprs
- where
- startReplace i cex [] = return $ Just cex
- startReplace i cex (a:as) = do val <- debugReplaceConc cex i a
- maybe ( --do putStrLn $ "didn't find "++ showExpr [] a ++ " in " ++showExpr [] cexpr
- return Nothing)
- (\x -> --do putStrLn $ "found it, the current expression is "++ showExpr [] x
- startReplace (i+1) x as)
- val
-
-debugReplaceConc :: Expr -> Int -> Expr -> IO (Maybe Expr)
-debugReplaceConc expr i e =
- let (newe,isThere) = searchArg expr
- in if isThere then return $ Just newe else return $ Nothing
- where
- searchArg e_ =
- if isGeneralizationOf e e_ then (mkMeta i, True)
- else maybe (e_,False) (\(cid,args) -> let repargs = map searchArg args
- in (mkApp cid (map fst repargs), or $ map snd repargs)) $ unApp e_
-
-
-{-
--- replaceArgs : Original expression to parse (from abstract syntax) -> Concrete expression (parsed)
-replaceArgs :: Expr -> Expr -> Environ -> Maybe Expr
-replaceArgs aexpr cexpr env =
- if isNothing $ unApp aexpr then error $ "could't unwrap this "++ show aexpr
- else if any isNothing $ map unApp $ snd $ fromJust $ unApp aexpr then error $ "couldn't unwrap more this : "++ show aexpr
- else
- let args = map (fst.fromJust.unApp) $ snd $ fromJust $ unApp aexpr
- concExprs = map (\x -> fromJust $ Map.lookup x $ getConcMap env) args
- in startReplace 1 cexpr concExprs
- where
- startReplace i cex [] = return cex
- startReplace i cex (a:as) = maybe Nothing (\x -> startReplace (i+1) x as) $ replaceConc cex i a
-
-
-
-replaceConc :: Expr -> Int -> Expr -> Maybe Expr
-replaceConc expr i e =
- let (newe,isThere) = searchArg expr
- in if isThere then return newe else Nothing
- where
- searchArg e_ =
- if isGeneralizationOf e e_ then (mkMeta i, True)
- else maybe (e_,False) (\(cid,args) -> let repargs = map searchArg args
- in (mkApp cid (map fst repargs), or $ map snd repargs)) $ unApp e_
-
-
-
-writeResults :: Environ -> String -> IO ()
-writeResults env fileName =
- let cmap = getConcMap env
- lincats = unlines $ map (\(x,y) -> "lincat " ++ showCId x ++ " = " ++ showCId y ++ " ; " ) $ Map.toList $ getTypeMap env
- sigs = unlines $ map
- (\x -> let n = getName x
- no = length $ getTypeArgs x
- oargs = unwords $ ("lin " ++ showCId n) : ["o"++show i | i <- [1..no]]
- in (oargs ++ " = " ++ (simpleReplace $ showExpr [] $ fromJust $ Map.lookup n cmap) ++ " ; ")) $ concat $ Map.elems $ getSigs env
- in
- writeFile fileName ("\n" ++ lincats ++ "\n\n" ++ sigs)
-
-
-simpleReplace :: String -> String
-simpleReplace [] = []
-simpleReplace ('?':xs) = 'o' : simpleReplace xs
-simpleReplace (x:xs) = x : simpleReplace xs
--}
-
-isMeta :: Expr -> Bool
-isMeta = isJust.unMeta
-
--- works with utf-8 characters also, as it seems
-
-
-mkFuncWithArg :: ((CId,CId),[CId]) -> FuncWithArg
-mkFuncWithArg ((c1,c2),cids) = FuncWithArg c1 c2 cids
-
-
----------------------------------------------------------------------------------
-
-initial :: TypeMap -> ConcMap -> [FuncWithArg] -> [FuncWithArg] -> Environ
-initial tm cm fss allfs = Env tm cm (mkSigs fss) allfs
-{-
-testInit :: [FuncWithArg] -> Environ
-testInit allfs = initial lTypes Map.empty [] allfs
-
-lTypes = Map.fromList [(mkCId "Comment", mkCId "S"),(mkCId "Item", mkCId "NP"), (mkCId "Kind", mkCId "CN"), (mkCId "Quality", mkCId "AP")]
--}
-startCateg = mkCId "Comment"
--- question about either to give the startcat or not ...
-
-
-
-
-
-----------------------------------------------------------------------------------------------------------
-{-
-main =
- do args <- getArgs
- case args of
- [pgfFile] ->
- do pgf <- readPGF pgfFile
- parsePGF <- readPGF parsePGFfile
- fsWithArg <- forExample pgf
- let funcsWithArg = map (map mkFuncWithArg) fsWithArg
- let morpho = buildMorpho parsePGF parseLang
- let fss = concat funcsWithArg
- let fileName = takeWhile (/='.') pgfFile ++ lang ++ ".gf"
- env <- start parsePGF pgf morpho (testInit fss) fss
- putStrLn $ "Should I write the results to a file ? yes/no"
- ans <-getLine
- if ans == "yes" then do writeResults env fileName
- putStrLn $ "Wrote file " ++ fileName
- else return ()
- _ -> fail "usage : Testing "
-
-
-
-start :: PGF -> PGF -> Morpho -> Environ -> [FuncWithArg] -> IO Environ
-start parsePGF pgfFile morpho env lst =
- do putStrLn "Do you want examples from another language ? (no/concrete syntax name otherwise)"
- ans1 <- getLine
- putStrLn "Do you want testing mode ? (yes/no)"
- ans2 <- getLine
- case (ans1,ans2) of
- ("no","no") -> do putStrLn "no extra language, just the abstract syntax tree"
- interact env lst False Nothing
- (_,"no") -> interact env lst False (readLanguage ans1)
- ("no","yes") -> do putStrLn "no extra language, just the abstract syntax tree"
- interact env lst True Nothing
- (_,"yes") -> interact env lst True (readLanguage ans1)
- ("no",_) -> do putStrLn "no extra language, just the abstract syntax tree"
- putStrLn $ "I assume you don't want the testing mode ... "
- interact env lst False Nothing
- (_,_) -> do putStrLn $ "I assume you don't want the testing mode ... "
- interact env lst False (readLanguage ans1)
- where
-
- interact environ [] func _ = return environ
- interact environ (farg:fargs) boo otherLang =
- do
- maybeEnv <- basicInter farg otherLang environ boo
- if isNothing maybeEnv then return environ
- else interact (fromJust maybeEnv) fargs boo otherLang
-
- basicInter farg js environ False =
- let e_ = getExpr farg environ in
- if isNothing e_ then return $ Just environ
- else parseAndBuild farg js environ (getType farg) e_ Nothing
- basicInter farg js environ True =
- let (e_,e_test) = get2Expr farg environ in
- if isNothing e_ then return $ Just environ
- else if isNothing e_test then do putStrLn $ "not enough arguments "++ (showCId $ getName farg)
- parseAndBuild farg js environ (getType farg) e_ Nothing
- else parseAndBuild farg js environ (getType farg) e_ e_test
-
--- . head . generateRandomFrom gen2 pgfFile
- parseAndBuild farg js environ ty e_ e_test =
- do let expr = fromJust e_
- gen1 <- newStdGen
- gen2 <- newStdGen
- let newexpr = head $ generateRandomFrom gen1 pgfFile expr
- let embeddedExpr = maybe "***" (showExpr [] ) (embedInStart (getAll environ) (Map.fromList [(ty,expr)]))
- let lexpr = if isNothing js then "" else "\n-- " ++ linearize pgfFile (fromJust js) newexpr ++ " --"
- putStrLn $ "Give an example for " ++ (showExpr [] expr)
- ++ lexpr ++ "and now"
- ++ "\n\nas in " ++ embeddedExpr ++ "\n\n"
- --
- ex <- getLine
- if (ex == ":q") then return Nothing
- else
- let ctype = fromJust $ Map.lookup (getType farg) (getTypeMap environ) in
- do env' <- decypher farg ex expr environ (fromJust $ readType $ showCId ctype) e_test
- return (Just env')
-
- decypher farg ex expr environ ty e_test =
- --do putStrLn $ "We need to parse " ++ ex ++ " as " ++ show ctype
- let pTrees = parse parsePGF (fromJust $ readLanguage "ParseEng") ty ex in
- pickTree farg expr environ ex e_test pTrees
-
- -- putStrLn $ "And now for testing, \n is this also correct yes/no \n ## " ++ (linearize parsePGF parseLang $ mapToResource newenv $ llin newenv e_test) ++ " ##"
-
- -- select the right tree among the options given by the parser
- pickTree farg expr environ ex e_test [] =
- let miswords = morphoMissing morpho (words ex)
- in
- if null miswords then do putStrLn $ "all words known, but some syntactic construction is not covered by the grammar..."
- return environ
- else do putStrLn $ "the following words are unknown, please add them to the lexicon: " ++ show miswords
- return environ
- pickTree farg expr environ ex e_test [tree] =
- do val <- searchGoodTree environ expr [tree] -- maybe order here after the probabilities for better precision
- maybe (do putStrLn $ "none of the trees is consistent with the rest of the grammar, please check arguments "
- return environ)
- (\(x,newtree) -> let newenv = updateEnv environ farg (getType farg) x in
- do putStrLn $ "the result is "++showExpr [] x
- newtestenv <- testTest newenv e_test -- question ? should it belong there - there is just one possibility of a tree...
- return newenv) val
- pickTree farg expr environ ex e_test parseTrees =
- do putStrLn $ "There is more than one possibility, do you want to choose the right tree yourself ? yes/no "
- putStr " >"
- ans <- getLine
- if ans == "yes" then do pTree <- chooseRightTree parseTrees
- processTree farg environ expr pTree e_test
- else processTree farg environ expr parseTrees e_test
-
- -- introduce testing function, if it doesn't work, then reparse, take that tree
- testTree envv e_test = return envv -- TO DO - add testing here
-
- testTest envv Nothing = return envv
- testTest envv (Just exxpr) = testTree envv exxpr
-
-
- -- allows the user to pick his own tree
- chooseRightTree trees = return trees -- TO DO - add something clever here
-
- -- selects the tree from where one can abstract over the original arguments
- processTree farg environ expr lsTrees e_test =
- let trmes = if length lsTrees == 1 then "the tree is not consistent " else "none of the trees is consistent " in
- do val <- searchGoodTree environ expr lsTrees
- maybe (do putStrLn $ trmes ++ "with the rest of the grammar, please check arguments! "
- return environ)
- (\(x,newtree) -> let newenv = updateEnv environ farg (getType farg) x in
- do putStrLn $ "the result is "++showExpr [] x
- newtestenv <- testTest newenv e_test
- return newenv) val
-
-
-
--------------------------------
-
-get2Expr :: FuncWithArg -> Environ -> (Maybe Expr, Maybe Expr)
-get2Expr farg env =
- let tys = getTypeArgs farg
- ctx = getSigs env
- (lst1,lst2) = getConcTypes2 ctx tys 1
- arg1 = if (all isJust lst1) then Just $ mkApp (getName farg) (map fromJust lst1) else Nothing
- arg2 = if (all isJust lst2) then Just $ mkApp (getName farg) (map fromJust lst2) else Nothing
- in if arg1 == arg2 then (arg1, Nothing)
- else (arg1,arg2)
- where
- getConcTypes2 context [] i = ([],[])
- getConcTypes2 context (ty:types) i =
- let pos = Map.lookup ty context
- in
- if isNothing pos || (null $ fromJust pos) then ([Nothing],[Nothing])
- else
- let (mm,tt) = (last $ fromJust pos, head $ fromJust pos)
- mmargs = getTypeArgs mm
- newi = i + length mmargs - 1
- (lst1,lst2) = getConcTypes2 (Map.insert ty (init (fromJust pos)) context) types (newi+1)
- ttargs = getTypeArgs tt
- newtti = i + length ttargs - 1
- fstArg = if (all isJust lst1) then -- i..newi
- (Just $ mkApp (getName mm) [mkMeta j | j <- [1..(length mmargs)]]) : lst1
- else [Nothing]
- sndArg = if (all isJust lst2) then
- (Just $ mkApp (getName tt) [mkMeta j | j <- [1..(length ttargs)]]) : lst2
- else [Nothing]
- in
- (fstArg,sndArg)
-
-
--}
-
\ No newline at end of file
diff --git a/src/example-based/ExampleService.hs b/src/example-based/ExampleService.hs
deleted file mode 100644
index e6312bf96..000000000
--- a/src/example-based/ExampleService.hs
+++ /dev/null
@@ -1,128 +0,0 @@
-module ExampleService(cgiMain,cgiMain',newPGFCache) where
-import System.Random(newStdGen)
-import System.FilePath((>),makeRelative)
-import Data.Map(fromList)
-import Data.Char(isDigit)
-import Data.Maybe(fromJust)
-import qualified Codec.Binary.UTF8.String as UTF8 (decodeString)
-import PGF
-import GF.Compile.ToAPI
-import Network.CGI
-import Text.JSON
-import CGIUtils
-import Cache
-import qualified ExampleDemo as E
-
-newPGFCache = newCache readPGF
-
-
-cgiMain :: Cache PGF -> CGI CGIResult
-cgiMain = handleErrors . handleCGIErrors . cgiMain' "." "."
-
-cgiMain' root cwd cache =
- do command <- getInp "command"
- environ <- parseEnviron =<< getInp "state"
- case command of
- "possibilities" -> doPossibilities environ
- "provide_example" -> doProvideExample root cwd cache environ
- "abstract_example" -> doAbstractExample cwd cache environ
- "test_function" -> doTestFunction cwd cache environ
- _ -> throwCGIError 400 ("Unknown command: "++command) []
-
-doPossibilities environ =
- do example_environ <- parseEnviron =<< getInp "example_state"
- outputJSONP (E.getNext environ example_environ)
-
-doProvideExample root cwd cache environ =
- do Just lang <- readInput "lang"
- fun <- getCId "fun"
- parsePGF <- readParsePGF cwd cache
- let adjpath path = root>makeRelative "/" (makeRelative root cwd>path)
- pgf <- liftIO . readCache cache . adjpath =<< getInp "grammar"
- gen <- liftIO newStdGen
- let Just (e,s) = E.provideExample gen environ fun parsePGF pgf lang
- res = (showExpr [] e,s)
- liftIO $ logError $ "proveExample ... = "++show res
- outputJSONP res
-
-doAbstractExample cwd cache environ =
- do example <- getInp "input"
- Just params <- readInput "params"
- absstr <- getInp "abstract"
- Just abs <- return $ readExpr absstr
- liftIO $ logError $ "abstract = "++showExpr [] abs
- Just cat <- readInput "cat"
- let t = mkType [] cat []
- parsePGF <- readParsePGF cwd cache
- let lang:_ = languages parsePGF
- ae <- liftIO $ abstractExample parsePGF environ lang t abs example
- outputJSONP (fmap (\(e,_)->(exprToAPI (instExpMeta params e),e)) ae)
-
-abstractExample parsePGF env lang cat abs example =
- E.searchGoodTree env abs (parse parsePGF lang cat example)
-
-doTestFunction cwd cache environ =
- do fun <- getCId "fun"
- parsePGF <- readParsePGF cwd cache
- let lang:_ = languages parsePGF
- Just txt <- return (E.testThis environ fun parsePGF lang)
- outputJSONP txt
-
-getCId :: String -> CGI CId
-getCId name = maybe err return =<< fmap readCId (getInp name)
- where err = throwCGIError 400 ("Bad "++name) []
-{-
-getLimit :: CGI Int
-getLimit = maybe err return =<< readInput "limit"
- where err = throwCGIError 400 "Missing/bad limit" []
--}
-
-readParsePGF cwd cache =
- do parsepgf <- getInp "parser"
- liftIO $ readCache cache (cwd>parsepgf)
-
-parseEnviron s = do state <- liftIO $ readIO s
- return $ environ state
-
-getInp name = maybe err (return . UTF8.decodeString) =<< getInput name
- where err = throwCGIError 400 ("Missing parameter: "++name) []
-
-
-instance JSON CId where
- showJSON = showJSON . show
- readJSON = (readResult =<<) . readJSON
-
-instance JSON Expr where
- showJSON = showJSON . showExpr []
- readJSON = (m2r . readExpr =<<) . readJSON
-
-m2r = maybe (Error "read failed") Ok
-
-readResult s = case reads s of
- (x,r):_ | lex r==[("","")] -> Ok x
- _ -> Error "read failed"
-
---------------------------------------------------------------------------------
--- cat lincat fun lin fun cat cat
-environ :: ([(CId, CId)],[(CId, Expr)],[((CId, CId), [CId])]) -> E.Environ
-environ (lincats,lins0,funs) =
- E.initial (fromList lincats) concmap fs allfs
- where
- concmap = fromList lins
- allfs = map E.mkFuncWithArg funs
- fs = [E.mkFuncWithArg f | f@((fn,_),_)<-funs, fn `elem` cns]
- cns = map fst lins
- lins = filter (not . E.isMeta .snd) lins0
-
-
-instExpMeta :: [CId] -> Expr -> Expr
-instExpMeta ps = fromJust . readExpr . instMeta ps . showExpr []
-
-instMeta :: [CId] -> String -> String
-instMeta ps s =
- case break (=='?') s of
- (s1,'?':s2) ->
- case span isDigit s2 of
- (s21@(_:_),s22) -> s1++show (ps!!(read s21-1))++instMeta ps s22
- ("",s22) -> s1++'?':instMeta ps s22
- (_,_) -> s
diff --git a/src/example-based/exb-fcgi.hs b/src/example-based/exb-fcgi.hs
deleted file mode 100644
index 54f1872d0..000000000
--- a/src/example-based/exb-fcgi.hs
+++ /dev/null
@@ -1,15 +0,0 @@
-{-# LANGUAGE CPP #-}
-import Control.Concurrent(forkIO)
-import Network.FastCGI(runFastCGI,runFastCGIConcurrent')
-import ExampleService(cgiMain,newPGFCache)
-
-main = do --stderrToFile logFile
- fcgiMain =<< newPGFCache
-
-
-fcgiMain cache =
-#ifndef mingw32_HOST_OS
- runFastCGIConcurrent' forkIO 100 (cgiMain cache)
-#else
- runFastCGI (cgiMain cache)
-#endif
diff --git a/src/example-based/gf-exb.cabal b/src/example-based/gf-exb.cabal
deleted file mode 100644
index 1366e75da..000000000
--- a/src/example-based/gf-exb.cabal
+++ /dev/null
@@ -1,25 +0,0 @@
-Name: gf-exb
-Version: 1.0
-Cabal-version: >= 1.8
-Build-type: Simple
-License: GPL
-Synopsis: Example-based grammar writing for the Grammatical Framework
-
-executable exb.fcgi
- main-is: exb-fcgi.hs
- Hs-source-dirs: . ../server ../compiler ../runtime/haskell
- other-modules: ExampleService ExampleDemo
- FastCGIUtils Cache GF.Compile.ToAPI
- -- and a lot more...
- ghc-options: -threaded
- if impl(ghc>=7.0)
- ghc-options: -rtsopts
-
- build-depends: base >=4.2 && <5, json, cgi, fastcgi, random,
- containers, old-time, directory, bytestring, utf8-string,
- pretty, array, mtl, fst, filepath
-
- if os(windows)
- ghc-options: -optl-mwindows
- else
- build-depends: unix
diff --git a/src/example-based/todo.txt b/src/example-based/todo.txt
deleted file mode 100644
index 196dbc097..000000000
--- a/src/example-based/todo.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-
-Editor improvements for example-based grammar writing:
-+ Remove the same language from the example language menu
-+ Send the other language environment to getNext
-- Compile a new .pgf automatically when needed
-- Update buttons automatically when functions are added or removed
-- Switch over to using AbsParadigmsEng.pgf instead of the old exprToAPI function
-
-Editor support for guided construction of linearization functions
-- enter api expressions by parsing them with AbsParadigmsEng.pgf in minibar
-- replace simpleParseInput with one that accepts quoted string literals
-- use lexcode/unlexcode in minibar
-- better support for literals in minibar (completion info from the PGF
- library should indicate if literals are acceptable)
-
-Server support for example-based grammar writing:
-- Change getNext to use info from the example language
-- Random generator restricted to defined functions
-
-- More testing
diff --git a/src/pgf-binary/PGF/Data/Binary.hs b/src/pgf-binary/PGF/Data/Binary.hs
deleted file mode 100644
index 7c10419b5..000000000
--- a/src/pgf-binary/PGF/Data/Binary.hs
+++ /dev/null
@@ -1,489 +0,0 @@
-{-# LANGUAGE FlexibleInstances, FlexibleContexts #-}
--- | This is a layer on top of "Data.Binary" with its own 'Binary' class
--- and customised instances for 'Word', 'Int' and 'Double'.
--- The 'Int' and 'Word' instance use a variable-length encoding to save space
--- for small numbers. The 'Double' instance uses the standard IEEE754 encoding.
-module PGF.Data.Binary (
-
- -- * The Binary class
- Binary(..)
-
- -- * The Get and Put monads
- , Get , Put, runPut
-
- -- * Useful helpers for writing instances
- , putWord8 , getWord8 , putWord16be , getWord16be
-
- -- * Binary serialisation
- , encode , decode
-
- -- * IO functions for serialisation
- , encodeFile , decodeFile
-
- , encodeFile_ , decodeFile_
-
- -- * Useful
- , Word8, Word16
-
- ) where
-
-
-import Data.Word
-
-import qualified Data.Binary as Bin
-import Data.Binary.Put
-import Data.Binary.Get
-import Data.Binary.IEEE754 ( putFloat64be, getFloat64be)
-import Control.Monad
-import Control.Exception
-import Foreign
-import System.IO
-
-import Data.ByteString.Lazy (ByteString)
-import qualified Data.ByteString.Lazy as L
-
---import Data.Char (chr,ord)
---import Data.List (unfoldr)
-
--- And needed for the instances:
-import qualified Data.ByteString as B
-import qualified Data.Map as Map
-import qualified Data.Set as Set
-import qualified Data.IntMap as IntMap
-import qualified Data.IntSet as IntSet
---import qualified Data.Ratio as R
-
---import qualified Data.Tree as T
-
-import Data.Array.Unboxed
-
-------------------------------------------------------------------------
-
--- | The @Binary@ class provides 'put' and 'get', methods to encode and
--- decode a Haskell value to a lazy ByteString. It mirrors the Read and
--- Show classes for textual representation of Haskell types, and is
--- suitable for serialising Haskell values to disk, over the network.
---
--- For parsing and generating simple external binary formats (e.g. C
--- structures), Binary may be used, but in general is not suitable
--- for complex protocols. Instead use the Put and Get primitives
--- directly.
---
--- Instances of Binary should satisfy the following property:
---
--- > decode . encode == id
---
--- That is, the 'get' and 'put' methods should be the inverse of each
--- other. A range of instances are provided for basic Haskell types.
---
-class Binary t where
- -- | Encode a value in the Put monad.
- put :: t -> Put
- -- | Decode a value in the Get monad
- get :: Get t
-
-------------------------------------------------------------------------
--- Wrappers to run the underlying monad
-
--- | Encode a value using binary serialisation to a lazy ByteString.
---
-encode :: Binary a => a -> ByteString
-encode = runPut . put
-{-# INLINE encode #-}
-
--- | Decode a value from a lazy ByteString, reconstructing the original structure.
---
-decode :: Binary a => ByteString -> a
-decode = runGet get
-
-------------------------------------------------------------------------
--- Convenience IO operations
-
--- | Lazily serialise a value to a file
---
--- This is just a convenience function, it's defined simply as:
---
--- > encodeFile f = B.writeFile f . encode
---
--- So for example if you wanted to compress as well, you could use:
---
--- > B.writeFile f . compress . encode
---
-encodeFile :: Binary a => FilePath -> a -> IO ()
-encodeFile f v = L.writeFile f (encode v)
-
-encodeFile_ :: FilePath -> Put -> IO ()
-encodeFile_ f m = L.writeFile f (runPut m)
-
--- | Lazily reconstruct a value previously written to a file.
---
--- This is just a convenience function, it's defined simply as:
---
--- > decodeFile f = return . decode =<< B.readFile f
---
--- So for example if you wanted to decompress as well, you could use:
---
--- > return . decode . decompress =<< B.readFile f
---
-decodeFile :: Binary a => FilePath -> IO a
-decodeFile f = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
- s <- L.hGetContents h
- evaluate $ runGet get s
-
-decodeFile_ :: FilePath -> Get a -> IO a
-decodeFile_ f m = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
- s <- L.hGetContents h
- evaluate $ runGet m s
-
-------------------------------------------------------------------------
--- For ground types, the standard instances can be reused,
--- but for container types it would imply using
--- the standard instances for all types of values in the container...
-
-instance Binary () where put=Bin.put; get=Bin.get
-instance Binary Bool where put=Bin.put; get=Bin.get
-instance Binary Word8 where put=Bin.put; get=Bin.get
-instance Binary Word16 where put=Bin.put; get=Bin.get
-instance Binary Char where put=Bin.put; get=Bin.get
-
--- -- GF doesn't need these:
---instance Binary Ordering where put=Bin.put; get=Bin.get
---instance Binary Word32 where put=Bin.put; get=Bin.get
---instance Binary Word64 where put=Bin.put; get=Bin.get
---instance Binary Int8 where put=Bin.put; get=Bin.get
---instance Binary Int16 where put=Bin.put; get=Bin.get
---instance Binary Int32 where put=Bin.put; get=Bin.get
-
---instance Binary Int64 where put=Bin.put; get=Bin.get -- needed by instance Binary ByteString
-
-------------------------------------------------------------------------
-
--- Words are written as sequence of bytes. The last bit of each
--- byte indicates whether there are more bytes to be read
-instance Binary Word where
- put i | i <= 0x7f = do put a
- | i <= 0x3fff = do put (a .|. 0x80)
- put b
- | i <= 0x1fffff = do put (a .|. 0x80)
- put (b .|. 0x80)
- put c
- | i <= 0xfffffff = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put d
--- -- #if WORD_SIZE_IN_BITS < 64
- | otherwise = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put (d .|. 0x80)
- put e
-{-
--- Restricted to 32 bits even on 64-bit systems, so that negative
--- Ints are written as 5 bytes instead of 10 bytes (TH 2013-02-13)
---#else
- | i <= 0x7ffffffff = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put (d .|. 0x80)
- put e
- | i <= 0x3ffffffffff = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put (d .|. 0x80)
- put (e .|. 0x80)
- put f
- | i <= 0x1ffffffffffff = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put (d .|. 0x80)
- put (e .|. 0x80)
- put (f .|. 0x80)
- put g
- | i <= 0xffffffffffffff = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put (d .|. 0x80)
- put (e .|. 0x80)
- put (f .|. 0x80)
- put (g .|. 0x80)
- put h
- | i <= 0xffffffffffffff = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put (d .|. 0x80)
- put (e .|. 0x80)
- put (f .|. 0x80)
- put (g .|. 0x80)
- put h
- | i <= 0x7fffffffffffffff = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put (d .|. 0x80)
- put (e .|. 0x80)
- put (f .|. 0x80)
- put (g .|. 0x80)
- put (h .|. 0x80)
- put j
- | otherwise = do put (a .|. 0x80)
- put (b .|. 0x80)
- put (c .|. 0x80)
- put (d .|. 0x80)
- put (e .|. 0x80)
- put (f .|. 0x80)
- put (g .|. 0x80)
- put (h .|. 0x80)
- put (j .|. 0x80)
- put k
--- #endif
--}
- where
- a = fromIntegral ( i .&. 0x7f) :: Word8
- b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
- c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
- d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
- e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
-{-
- f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
- g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
- h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
- j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
- k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
--}
- get = do i <- getWord8
- (if i <= 0x7f
- then return (fromIntegral i)
- else do n <- get
- return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
-
--- Int has the same representation as Word
-instance Binary Int where
- put i = put (fromIntegral i :: Word)
- get = liftM toInt32 (get :: Get Word)
- where
- -- restrict to 32 bits (for PGF portability, TH 2013-02-13)
- toInt32 w = fromIntegral (fromIntegral w::Int32)::Int
-
-------------------------------------------------------------------------
---
--- Portable, and pretty efficient, serialisation of Integer
---
-
--- Fixed-size type for a subset of Integer
---type SmallInt = Int32
-
--- Integers are encoded in two ways: if they fit inside a SmallInt,
--- they're written as a byte tag, and that value. If the Integer value
--- is too large to fit in a SmallInt, it is written as a byte array,
--- along with a sign and length field.
-{-
-instance Binary Integer where
-
- {-# INLINE put #-}
- put n | n >= lo && n <= hi = do
- putWord8 0
- put (fromIntegral n :: SmallInt) -- fast path
- where
- lo = fromIntegral (minBound :: SmallInt) :: Integer
- hi = fromIntegral (maxBound :: SmallInt) :: Integer
-
- put n = do
- putWord8 1
- put sign
- put (unroll (abs n)) -- unroll the bytes
- where
- sign = fromIntegral (signum n) :: Word8
-
- {-# INLINE get #-}
- get = do
- tag <- get :: Get Word8
- case tag of
- 0 -> liftM fromIntegral (get :: Get SmallInt)
- _ -> do sign <- get
- bytes <- get
- let v = roll bytes
- return $! if sign == (1 :: Word8) then v else - v
-
---
--- Fold and unfold an Integer to and from a list of its bytes
---
-unroll :: Integer -> [Word8]
-unroll = unfoldr step
- where
- step 0 = Nothing
- step i = Just (fromIntegral i, i `shiftR` 8)
-
-roll :: [Word8] -> Integer
-roll = foldr unstep 0
- where
- unstep b a = a `shiftL` 8 .|. fromIntegral b
-
-instance (Binary a,Integral a) => Binary (R.Ratio a) where
- put r = put (R.numerator r) >> put (R.denominator r)
- get = liftM2 (R.%) get get
--}
-
-------------------------------------------------------------------------
--- Instances for the first few tuples
-
-instance (Binary a, Binary b) => Binary (a,b) where
- put (a,b) = put a >> put b
- get = liftM2 (,) get get
-
-instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
- put (a,b,c) = put a >> put b >> put c
- get = liftM3 (,,) get get get
-
-instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
- put (a,b,c,d) = put a >> put b >> put c >> put d
- get = liftM4 (,,,) get get get get
-
-instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
- put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
- get = liftM5 (,,,,) get get get get get
-
---
--- and now just recurse:
---
-
-instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
- => Binary (a,b,c,d,e,f) where
- put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
- get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
-
-instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
- => Binary (a,b,c,d,e,f,g) where
- put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
- get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
-
-instance (Binary a, Binary b, Binary c, Binary d, Binary e,
- Binary f, Binary g, Binary h)
- => Binary (a,b,c,d,e,f,g,h) where
- put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
- get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
-
-instance (Binary a, Binary b, Binary c, Binary d, Binary e,
- Binary f, Binary g, Binary h, Binary i)
- => Binary (a,b,c,d,e,f,g,h,i) where
- put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
- get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
-
-instance (Binary a, Binary b, Binary c, Binary d, Binary e,
- Binary f, Binary g, Binary h, Binary i, Binary j)
- => Binary (a,b,c,d,e,f,g,h,i,j) where
- put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
- get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
-
-------------------------------------------------------------------------
--- Container types
-
-instance Binary a => Binary [a] where
- put l = put (length l) >> mapM_ put l
- get = do n <- get :: Get Int
- xs <- replicateM n get
- return xs
-
-instance (Binary a) => Binary (Maybe a) where
- put Nothing = putWord8 0
- put (Just x) = putWord8 1 >> put x
- get = do
- w <- getWord8
- case w of
- 0 -> return Nothing
- _ -> liftM Just get
-
-instance (Binary a, Binary b) => Binary (Either a b) where
- put (Left a) = putWord8 0 >> put a
- put (Right b) = putWord8 1 >> put b
- get = do
- w <- getWord8
- case w of
- 0 -> liftM Left get
- _ -> liftM Right get
-
-------------------------------------------------------------------------
--- ByteStrings (have specially efficient instances)
-
-instance Binary B.ByteString where
- put bs = do put (B.length bs)
- putByteString bs
- get = get >>= getByteString
-
---
--- Using old versions of fps, this is a type synonym, and non portable
---
--- Requires 'flexible instances'
---
-{-
-instance Binary ByteString where
- put bs = do put (fromIntegral (L.length bs) :: Int)
- putLazyByteString bs
- get = get >>= getLazyByteString
--}
-------------------------------------------------------------------------
--- Maps and Sets
-
-instance (Ord a, Binary a) => Binary (Set.Set a) where
- put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
- get = liftM Set.fromDistinctAscList get
-
-instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
- put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
- get = liftM Map.fromDistinctAscList get
-
-instance Binary IntSet.IntSet where
- put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
- get = liftM IntSet.fromDistinctAscList get
-
-instance (Binary e) => Binary (IntMap.IntMap e) where
- put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
- get = liftM IntMap.fromDistinctAscList get
-
-------------------------------------------------------------------------
--- Floating point
-
--- instance Binary Double where
--- put d = put (decodeFloat d)
--- get = liftM2 encodeFloat get get
-
-instance Binary Double where
- put = putFloat64be
- get = getFloat64be
-{-
-instance Binary Float where
- put f = put (decodeFloat f)
- get = liftM2 encodeFloat get get
--}
-------------------------------------------------------------------------
--- Trees
-{-
-instance (Binary e) => Binary (T.Tree e) where
- put (T.Node r s) = put r >> put s
- get = liftM2 T.Node get get
--}
-------------------------------------------------------------------------
--- Arrays
-
-instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
- put a = do
- put (bounds a)
- put (rangeSize $ bounds a) -- write the length
- mapM_ put (elems a) -- now the elems.
- get = do
- bs <- get
- n <- get -- read the length
- xs <- replicateM n get -- now the elems.
- return (listArray bs xs)
-
---
--- The IArray UArray e constraint is non portable. Requires flexible instances
---
-instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
- put a = do
- put (bounds a)
- put (rangeSize $ bounds a) -- now write the length
- mapM_ put (elems a)
- get = do
- bs <- get
- n <- get
- xs <- replicateM n get
- return (listArray bs xs)
diff --git a/src/pgf-binary/pgf-binary.cabal b/src/pgf-binary/pgf-binary.cabal
deleted file mode 100644
index 3f9bea896..000000000
--- a/src/pgf-binary/pgf-binary.cabal
+++ /dev/null
@@ -1,27 +0,0 @@
-name: pgf-binary
-version: 0.5
-
-cabal-version: >= 1.10
-build-type: Simple
-license: BSD3
---license-file: LICENSE
-synopsis: Custom version of the binary-0.5 package for the PGF library
-homepage: http://www.grammaticalframework.org/
---bug-reports: http://code.google.com/p/grammatical-framework/issues/list
-maintainer: Thomas Hallgren
-stability: provisional
-category: Data, Parsing
-tested-with: GHC==7.4.2, GHC==7.8.3
-
-source-repository head
- type: darcs
- location: http://www.grammaticalframework.org/
-
-Library
- default-language: Haskell2010
- build-depends: base >= 4.3 && <5, binary, data-binary-ieee754,
- containers, array, bytestring
- exposed-modules: PGF.Data.Binary
-
- ghc-options: -fwarn-unused-imports -O2
- extensions: FlexibleInstances, FlexibleContexts
diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am
index edc4f88b2..8e757ca27 100644
--- a/src/runtime/c/Makefile.am
+++ b/src/runtime/c/Makefile.am
@@ -68,6 +68,7 @@ libpgf_la_SOURCES = \
pgf/data.h \
pgf/expr.c \
pgf/expr.h \
+ pgf/scanner.c \
pgf/parser.c \
pgf/lookup.c \
pgf/jit.c \
diff --git a/src/runtime/c/gu/defs.h b/src/runtime/c/gu/defs.h
index 6b531979c..9f59d8656 100644
--- a/src/runtime/c/gu/defs.h
+++ b/src/runtime/c/gu/defs.h
@@ -64,6 +64,8 @@
#ifdef GU_ALIGNOF
# define gu_alignof GU_ALIGNOF
+#elif defined(_MSC_VER)
+# define gu_alignof __alignof
#else
# define gu_alignof(t_) \
((size_t)(offsetof(struct { char c_; t_ e_; }, e_)))
@@ -77,7 +79,7 @@
#define GU_COMMA ,
-#define GU_ARRAY_LEN(t,a) (sizeof((const t[])a) / sizeof(t))
+#define GU_ARRAY_LEN(a) (sizeof(a) / sizeof(a[0]))
#define GU_ID(...) __VA_ARGS__
@@ -183,9 +185,13 @@ typedef union {
void (*fp)();
} GuMaxAlign;
+#if defined(_MSC_VER)
+#include
+#define gu_alloca(N) alloca(N)
+#else
#define gu_alloca(N) \
(((union { GuMaxAlign align_; uint8_t buf_[N]; }){{0}}).buf_)
-
+#endif
// For Doxygen
#define GU_PRIVATE /** @private */
diff --git a/src/runtime/c/gu/map.c b/src/runtime/c/gu/map.c
index 9abebbe6e..dc19bc932 100644
--- a/src/runtime/c/gu/map.c
+++ b/src/runtime/c/gu/map.c
@@ -7,6 +7,9 @@
typedef struct GuMapData GuMapData;
+#define SKIP_DELETED 1
+#define SKIP_NONE 2
+
struct GuMapData {
uint8_t* keys;
uint8_t* values;
@@ -19,6 +22,7 @@ struct GuMap {
GuHasher* hasher;
size_t key_size;
size_t value_size;
+ size_t cell_size; // cell_size = GU_MAX(value_size,sizeof(uint8_t))
const void* default_value;
GuMapData data;
@@ -30,9 +34,7 @@ gu_map_finalize(GuFinalizer* fin)
{
GuMap* map = gu_container(fin, GuMap, fin);
gu_mem_buf_free(map->data.keys);
- if (map->value_size) {
- gu_mem_buf_free(map->data.values);
- }
+ gu_mem_buf_free(map->data.values);
}
static const GuWord gu_map_empty_key = 0;
@@ -68,7 +70,7 @@ gu_map_entry_is_free(GuMap* map, GuMapData* data, size_t idx)
}
static bool
-gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
+gu_map_lookup(GuMap* map, const void* key, uint8_t del, size_t* idx_out)
{
size_t n = map->data.n_entries;
if (map->hasher == gu_addr_hasher) {
@@ -78,13 +80,17 @@ gu_map_lookup(GuMap* map, const void* key, size_t* idx_out)
while (true) {
const void* entry_key =
((const void**)map->data.keys)[idx];
+
if (entry_key == NULL && map->data.zero_idx != idx) {
- *idx_out = idx;
- return false;
+ if (map->data.values[idx * map->cell_size] != del) { //skip deleted
+ *idx_out = idx;
+ return false;
+ }
} else if (entry_key == key) {
*idx_out = idx;
return true;
}
+
idx = (idx + offset) % n;
}
} else if (map->hasher == gu_word_hasher) {
@@ -156,33 +162,18 @@ gu_map_resize(GuMap* map, size_t req_entries)
size_t key_size = map->key_size;
size_t key_alloc = 0;
data->keys = gu_mem_buf_alloc(req_entries * key_size, &key_alloc);
+ memset(data->keys, 0, key_alloc);
- size_t value_size = map->value_size;
size_t value_alloc = 0;
- if (value_size) {
- data->values = gu_mem_buf_alloc(req_entries * value_size,
- &value_alloc);
- memset(data->values, 0, value_alloc);
- }
-
- data->n_entries = gu_twin_prime_inf(value_size ?
- GU_MIN(key_alloc / key_size,
- value_alloc / value_size)
- : key_alloc / key_size);
- if (map->hasher == gu_addr_hasher) {
- for (size_t i = 0; i < data->n_entries; i++) {
- ((const void**)data->keys)[i] = NULL;
- }
- } else if (map->hasher == gu_string_hasher) {
- for (size_t i = 0; i < data->n_entries; i++) {
- ((GuString*)data->keys)[i] = NULL;
- }
- } else {
- memset(data->keys, 0, key_alloc);
- }
+ size_t cell_size = map->cell_size;
+ data->values = gu_mem_buf_alloc(req_entries * cell_size, &value_alloc);
+ memset(data->values, 0, value_alloc);
+ data->n_entries = gu_twin_prime_inf(
+ GU_MIN(key_alloc / key_size,
+ value_alloc / cell_size));
gu_assert(data->n_entries > data->n_occupied);
-
+
data->n_occupied = 0;
data->zero_idx = SIZE_MAX;
@@ -196,16 +187,14 @@ gu_map_resize(GuMap* map, size_t req_entries)
} else if (map->hasher == gu_string_hasher) {
old_key = (void*) *(GuString*)old_key;
}
- void* old_value = &old_data.values[i * value_size];
+ void* old_value = &old_data.values[i * cell_size];
memcpy(gu_map_insert(map, old_key),
old_value, map->value_size);
}
gu_mem_buf_free(old_data.keys);
- if (value_size) {
- gu_mem_buf_free(old_data.values);
- }
+ gu_mem_buf_free(old_data.values);
}
@@ -226,9 +215,9 @@ GU_API void*
gu_map_find(GuMap* map, const void* key)
{
size_t idx;
- bool found = gu_map_lookup(map, key, &idx);
+ bool found = gu_map_lookup(map, key, SKIP_DELETED, &idx);
if (found) {
- return &map->data.values[idx * map->value_size];
+ return &map->data.values[idx * map->cell_size];
}
return NULL;
}
@@ -244,7 +233,7 @@ GU_API const void*
gu_map_find_key(GuMap* map, const void* key)
{
size_t idx;
- bool found = gu_map_lookup(map, key, &idx);
+ bool found = gu_map_lookup(map, key, SKIP_DELETED, &idx);
if (found) {
return &map->data.keys[idx * map->key_size];
}
@@ -255,17 +244,17 @@ GU_API bool
gu_map_has(GuMap* ht, const void* key)
{
size_t idx;
- return gu_map_lookup(ht, key, &idx);
+ return gu_map_lookup(ht, key, SKIP_DELETED, &idx);
}
GU_API void*
gu_map_insert(GuMap* map, const void* key)
{
size_t idx;
- bool found = gu_map_lookup(map, key, &idx);
+ bool found = gu_map_lookup(map, key, SKIP_NONE, &idx);
if (!found) {
if (gu_map_maybe_resize(map)) {
- found = gu_map_lookup(map, key, &idx);
+ found = gu_map_lookup(map, key, SKIP_NONE, &idx);
gu_assert(!found);
}
if (map->hasher == gu_addr_hasher) {
@@ -277,7 +266,7 @@ gu_map_insert(GuMap* map, const void* key)
key, map->key_size);
}
if (map->default_value) {
- memcpy(&map->data.values[idx * map->value_size],
+ memcpy(&map->data.values[idx * map->cell_size],
map->default_value, map->value_size);
}
if (gu_map_entry_is_free(map, &map->data, idx)) {
@@ -286,7 +275,32 @@ gu_map_insert(GuMap* map, const void* key)
}
map->data.n_occupied++;
}
- return &map->data.values[idx * map->value_size];
+ return &map->data.values[idx * map->cell_size];
+}
+
+GU_API void
+gu_map_delete(GuMap* map, const void* key)
+{
+ size_t idx;
+ bool found = gu_map_lookup(map, key, SKIP_NONE, &idx);
+ if (found) {
+ if (map->hasher == gu_addr_hasher) {
+ ((const void**)map->data.keys)[idx] = NULL;
+ } else if (map->hasher == gu_string_hasher) {
+ ((GuString*)map->data.keys)[idx] = NULL;
+ } else {
+ memset(&map->data.keys[idx * map->key_size],
+ 0, map->key_size);
+ }
+ map->data.values[idx * map->cell_size] = SKIP_DELETED;
+
+ if (gu_map_buf_is_zero(&map->data.keys[idx * map->key_size],
+ map->key_size)) {
+ map->data.zero_idx = SIZE_MAX;
+ }
+
+ map->data.n_occupied--;
+ }
}
GU_API void
@@ -297,7 +311,7 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
continue;
}
const void* key = &map->data.keys[i * map->key_size];
- void* value = &map->data.values[i * map->value_size];
+ void* value = &map->data.values[i * map->cell_size];
if (map->hasher == gu_addr_hasher) {
key = *(const void* const*) key;
} else if (map->hasher == gu_string_hasher) {
@@ -307,47 +321,30 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
}
}
-typedef struct {
- GuEnum en;
- GuMap* ht;
- size_t i;
- GuMapKeyValue x;
-} GuMapEnum;
-
-static void
-gu_map_enum_next(GuEnum* self, void* to, GuPool* pool)
+GU_API bool
+gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue)
{
- *((GuMapKeyValue**) to) = NULL;
-
- size_t i;
- GuMapEnum* en = (GuMapEnum*) self;
- for (i = en->i; i < en->ht->data.n_entries; i++) {
- if (gu_map_entry_is_free(en->ht, &en->ht->data, i)) {
+ while (*pi < map->data.n_entries) {
+ if (gu_map_entry_is_free(map, &map->data, *pi)) {
+ (*pi)++;
continue;
}
- en->x.key = &en->ht->data.keys[i * en->ht->key_size];
- en->x.value = &en->ht->data.values[i * en->ht->value_size];
- if (en->ht->hasher == gu_addr_hasher) {
- en->x.key = *(const void* const*) en->x.key;
- } else if (en->ht->hasher == gu_string_hasher) {
- en->x.key = *(GuString*) en->x.key;
+
+ *pkey = &map->data.keys[*pi * map->key_size];
+ if (map->hasher == gu_addr_hasher) {
+ *pkey = *(void**) *pkey;
+ } else if (map->hasher == gu_string_hasher) {
+ *pkey = *(void**) *pkey;
}
- *((GuMapKeyValue**) to) = &en->x;
- break;
- }
-
- en->i = i+1;
-}
+ memcpy(pvalue, &map->data.values[*pi * map->cell_size],
+ map->value_size);
-GU_API GuEnum*
-gu_map_enum(GuMap* ht, GuPool* pool)
-{
- GuMapEnum* en = gu_new(GuMapEnum, pool);
- en->en.next = gu_map_enum_next;
- en->ht = ht;
- en->i = 0;
- return &en->en;
+ (*pi)++;
+ return true;
+ }
+
+ return false;
}
GU_API size_t
@@ -363,8 +360,6 @@ gu_map_count(GuMap* map)
return count;
}
-static const uint8_t gu_map_no_values[1] = { 0 };
-
GU_API GuMap*
gu_make_map(size_t key_size, GuHasher* hasher,
size_t value_size, const void* default_value,
@@ -375,7 +370,7 @@ gu_make_map(size_t key_size, GuHasher* hasher,
.n_occupied = 0,
.n_entries = 0,
.keys = NULL,
- .values = value_size ? NULL : (uint8_t*) gu_map_no_values,
+ .values = NULL,
.zero_idx = SIZE_MAX
};
GuMap* map = gu_new(GuMap, pool);
@@ -384,6 +379,7 @@ gu_make_map(size_t key_size, GuHasher* hasher,
map->data = data;
map->key_size = key_size;
map->value_size = value_size;
+ map->cell_size = GU_MAX(value_size,sizeof(uint8_t));
map->fin.fn = gu_map_finalize;
gu_pool_finally(pool, &map->fin);
diff --git a/src/runtime/c/gu/map.h b/src/runtime/c/gu/map.h
index ffd937a01..cc91a27f7 100644
--- a/src/runtime/c/gu/map.h
+++ b/src/runtime/c/gu/map.h
@@ -62,6 +62,9 @@ gu_map_has(GuMap* ht, const void* key);
GU_API_DECL void*
gu_map_insert(GuMap* ht, const void* key);
+GU_API_DECL void
+gu_map_delete(GuMap* ht, const void* key);
+
#define gu_map_put(MAP, KEYP, V, VAL) \
GU_BEGIN \
V* gu_map_put_p_ = gu_map_insert((MAP), (KEYP)); \
@@ -71,13 +74,8 @@ gu_map_insert(GuMap* ht, const void* key);
GU_API_DECL void
gu_map_iter(GuMap* ht, GuMapItor* itor, GuExn* err);
-typedef struct {
- const void* key;
- void* value;
-} GuMapKeyValue;
-
-GU_API_DECL GuEnum*
-gu_map_enum(GuMap* ht, GuPool* pool);
+GU_API bool
+gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue);
typedef GuMap GuIntMap;
diff --git a/src/runtime/c/gu/mem.c b/src/runtime/c/gu/mem.c
index 3e5bd57a2..80e99242e 100644
--- a/src/runtime/c/gu/mem.c
+++ b/src/runtime/c/gu/mem.c
@@ -8,6 +8,10 @@
#include
#include
#endif
+#if defined(__MINGW32__) || defined(_MSC_VER)
+#include
+#endif
+
#if !defined(_MSC_VER)
#include
#endif
@@ -108,6 +112,39 @@ gu_mem_buf_alloc(size_t min_size, size_t* real_size_out)
return gu_mem_buf_realloc(NULL, min_size, real_size_out);
}
+#if defined(__MINGW32__) || defined(_MSC_VER)
+#include
+
+static int
+getpagesize()
+{
+ SYSTEM_INFO system_info;
+ GetSystemInfo(&system_info);
+ return system_info.dwPageSize;
+}
+#endif
+
+GU_API void*
+gu_mem_page_alloc(size_t min_size, size_t* real_size_out)
+{
+ size_t page_size = getpagesize();
+ size_t size = ((min_size + page_size - 1) / page_size) * page_size;
+ void *page = NULL;
+
+#if defined(ANDROID)
+ if ((page = memalign(page_size, size)) == NULL) {
+#elif defined(__MINGW32__) || defined(_MSC_VER)
+ if ((page = malloc(size)) == NULL) {
+#else
+ if (posix_memalign(&page, page_size, size) != 0) {
+#endif
+ gu_fatal("Memory allocation failed");
+ }
+
+ *real_size_out = size;
+ return page;
+}
+
GU_API void
gu_mem_buf_free(void* buf)
{
@@ -132,6 +169,7 @@ struct GuFinalizerNode {
enum GuPoolType {
GU_POOL_HEAP,
GU_POOL_LOCAL,
+ GU_POOL_PAGE,
GU_POOL_MMAP
};
@@ -180,6 +218,16 @@ gu_new_pool(void)
return pool;
}
+GU_API GuPool*
+gu_new_page_pool(void)
+{
+ size_t sz = GU_FLEX_SIZE(GuPool, init_buf, gu_mem_pool_initial_size);
+ uint8_t* buf = gu_mem_page_alloc(sz, &sz);
+ GuPool* pool = gu_init_pool(buf, sz);
+ pool->type = GU_POOL_PAGE;
+ return pool;
+}
+
GU_API GuPool*
gu_mmap_pool(char* fpath, void* addr, size_t size, void**pptr)
{
@@ -238,7 +286,10 @@ gu_pool_expand(GuPool* pool, size_t req)
gu_mem_chunk_max_size));
gu_assert(real_req >= sizeof(GuMemChunk));
size_t size = 0;
- GuMemChunk* chunk = gu_mem_buf_alloc(real_req, &size);
+ GuMemChunk* chunk =
+ (pool->type == GU_POOL_PAGE)
+ ? gu_mem_page_alloc(real_req, &size)
+ : gu_mem_buf_alloc(real_req, &size);
chunk->next = pool->chunks;
pool->chunks = chunk;
pool->curr_buf = (uint8_t*) chunk;
@@ -309,6 +360,7 @@ gu_malloc_prefixed(GuPool* pool, size_t pre_align, size_t pre_size,
size_t full_size = gu_mem_advance(offsetof(GuMemChunk, data),
pre_align, pre_size, align, size);
if (full_size > gu_mem_max_shared_alloc &&
+ pool->type != GU_POOL_PAGE &&
pool->type != GU_POOL_MMAP) {
GuMemChunk* chunk = gu_mem_alloc(full_size);
chunk->next = pool->chunks;
diff --git a/src/runtime/c/gu/mem.h b/src/runtime/c/gu/mem.h
index 3f16a6a1c..1d4a52bf9 100644
--- a/src/runtime/c/gu/mem.h
+++ b/src/runtime/c/gu/mem.h
@@ -55,6 +55,11 @@ gu_local_pool_(uint8_t* init_buf, size_t sz);
* should not be used in the bodies of recursive functions.
*/
+/// Create a pool where each chunk is corresponds to one or
+/// more pages.
+GU_API_DECL GuPool*
+gu_new_page_pool(void);
+
/// Create a pool stored in a memory mapped file.
GU_API_DECL GuPool*
gu_mmap_pool(char* fpath, void* addr, size_t size, void**pptr);
@@ -198,6 +203,9 @@ gu_mem_buf_realloc(
size_t min_size,
size_t* real_size_out);
+/// Allocate enough memory pages to contain min_size bytes.
+GU_API_DECL void*
+gu_mem_page_alloc(size_t min_size, size_t* real_size_out);
/// Free a memory buffer.
GU_API_DECL void
diff --git a/src/runtime/c/gu/seq.c b/src/runtime/c/gu/seq.c
index 72ccc3fae..fd535caf6 100644
--- a/src/runtime/c/gu/seq.c
+++ b/src/runtime/c/gu/seq.c
@@ -100,6 +100,11 @@ gu_seq_free(GuSeq* seq)
gu_mem_buf_free(seq);
}
+static void
+gu_dummy_finalizer(GuFinalizer* self)
+{
+}
+
GU_API void
gu_buf_require(GuBuf* buf, size_t req_len)
{
@@ -109,7 +114,9 @@ gu_buf_require(GuBuf* buf, size_t req_len)
size_t req_size = sizeof(GuSeq) + buf->elem_size * req_len;
size_t real_size;
-
+
+ gu_require(buf->fin.fn != gu_dummy_finalizer);
+
if (buf->seq == NULL || buf->seq == gu_empty_seq()) {
buf->seq = gu_mem_buf_alloc(req_size, &real_size);
buf->seq->len = 0;
@@ -164,6 +171,24 @@ gu_buf_freeze(GuBuf* buf, GuPool* pool)
return seq;
}
+GU_API void
+gu_buf_evacuate(GuBuf* buf, GuPool* pool)
+{
+ if (buf->seq != gu_empty_seq()) {
+ size_t len = gu_buf_length(buf);
+
+ GuSeq* seq = gu_make_seq(buf->elem_size, len, pool);
+ void* bufdata = gu_buf_data(buf);
+ void* seqdata = gu_seq_data(seq);
+ memcpy(seqdata, bufdata, buf->elem_size * len);
+ gu_mem_buf_free(buf->seq);
+
+ buf->seq = seq;
+ buf->fin.fn = gu_dummy_finalizer;
+ buf->avail_len = len;
+ }
+}
+
GU_API void*
gu_buf_insert(GuBuf* buf, size_t index)
{
@@ -335,13 +360,8 @@ GU_API void
gu_buf_heap_pop(GuBuf *buf, GuOrder *order, void* data_out)
{
const void* last = gu_buf_trim(buf); // raises an error if empty
-
- if (gu_buf_length(buf) > 0) {
- memcpy(data_out, buf->seq->data, buf->elem_size);
- gu_heap_siftup(buf, order, last, 0);
- } else {
- memcpy(data_out, last, buf->elem_size);
- }
+ memcpy(data_out, buf->seq->data, buf->elem_size);
+ gu_heap_siftup(buf, order, last, 0);
}
GU_API void
diff --git a/src/runtime/c/gu/seq.h b/src/runtime/c/gu/seq.h
index c19a23d1c..b639369c3 100644
--- a/src/runtime/c/gu/seq.h
+++ b/src/runtime/c/gu/seq.h
@@ -182,6 +182,9 @@ gu_buf_heapify(GuBuf *buf, GuOrder *order);
GU_API_DECL GuSeq*
gu_buf_freeze(GuBuf* buf, GuPool* pool);
+
+GU_API_DECL void
+gu_buf_evacuate(GuBuf* buf, GuPool* pool);
#endif // GU_SEQ_H_
#ifdef GU_STRING_H_
diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h
index 45685c82d..680c41a45 100644
--- a/src/runtime/c/pgf/data.h
+++ b/src/runtime/c/pgf/data.h
@@ -344,8 +344,9 @@ struct PgfCCat {
PgfCncFuns* linrefs;
size_t n_synprods;
PgfProductionSeq* prods;
- float viterbi_prob;
+ prob_t viterbi_prob;
int fid;
+ int chunk_count;
PgfItemConts* conts;
struct PgfAnswers* answers;
GuFinalizer fin[0];
diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h
index 44fe440ae..6e2bd68de 100644
--- a/src/runtime/c/pgf/expr.h
+++ b/src/runtime/c/pgf/expr.h
@@ -198,16 +198,16 @@ pgf_literal_hash(GuHash h, PgfLiteral lit);
PGF_API_DECL GuHash
pgf_expr_hash(GuHash h, PgfExpr e);
-PGF_API size_t
+PGF_API_DECL size_t
pgf_expr_size(PgfExpr expr);
-PGF_API GuSeq*
+PGF_API_DECL GuSeq*
pgf_expr_functions(PgfExpr expr, GuPool* pool);
-PGF_API PgfExpr
+PGF_API_DECL PgfExpr
pgf_expr_substitute(PgfExpr expr, GuSeq* meta_values, GuPool* pool);
-PGF_API PgfType*
+PGF_API_DECL PgfType*
pgf_type_substitute(PgfType* type, GuSeq* meta_values, GuPool* pool);
typedef struct PgfPrintContext PgfPrintContext;
diff --git a/src/runtime/c/pgf/jit.c b/src/runtime/c/pgf/jit.c
index 1eda95a0d..6c8679523 100644
--- a/src/runtime/c/pgf/jit.c
+++ b/src/runtime/c/pgf/jit.c
@@ -5,9 +5,6 @@
#include
#include
#include "lightning.h"
-#if defined(__MINGW32__) || defined(_MSC_VER)
-#include
-#endif
//#define PGF_JIT_DEBUG
@@ -43,18 +40,6 @@ typedef struct {
#define JIT_VSTATE JIT_V1
#define JIT_VCLOS JIT_V2
-#if defined(__MINGW32__) || defined(_MSC_VER)
-#include
-
-static int
-getpagesize()
-{
- SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
- return system_info.dwPageSize;
-}
-#endif
-
static void
pgf_jit_finalize_page(GuFinalizer* self)
@@ -65,19 +50,8 @@ pgf_jit_finalize_page(GuFinalizer* self)
static void
pgf_jit_alloc_page(PgfReader* rdr)
{
- void *page;
-
- size_t page_size = getpagesize();
-
-#if defined(ANDROID)
- if ((page = memalign(page_size, page_size)) == NULL) {
-#elif defined(__MINGW32__) || defined(_MSC_VER)
- if ((page = malloc(page_size)) == NULL) {
-#else
- if (posix_memalign(&page, page_size, page_size) != 0) {
-#endif
- gu_fatal("Memory allocation failed");
- }
+ size_t page_size;
+ void *page = gu_mem_page_alloc(sizeof(GuFinalizer), &page_size);
GuFinalizer* fin = page;
fin->fn = pgf_jit_finalize_page;
diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c
index 0f5f6d1ac..550747e65 100644
--- a/src/runtime/c/pgf/parser.c
+++ b/src/runtime/c/pgf/parser.c
@@ -1,6 +1,5 @@
#include
#include
-#include
#include
#include
#include
@@ -30,6 +29,7 @@ struct PgfItemConts {
typedef GuSeq PgfItemContss;
typedef GuMap PgfContsMap;
typedef GuMap PgfGenCatMap;
+typedef GuMap PgfChunksMap;
typedef GuBuf PgfCCatBuf;
@@ -49,7 +49,6 @@ typedef struct {
#ifdef PGF_COUNTS_DEBUG
int item_full_count;
int item_real_count;
- int cont_full_count;
int ccat_full_count;
int prod_full_count;
#endif
@@ -62,40 +61,39 @@ typedef struct {
typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
-typedef struct {
- PgfProductionIdx* idx;
- size_t offset;
-} PgfLexiconIdxEntry;
-
-typedef GuBuf PgfLexiconIdx;
-
struct PgfParseState {
PgfParseState* next;
PgfItemBuf* agenda;
PgfContsMap* conts_map;
PgfGenCatMap* generated_cats;
+ PgfChunksMap* chunks_map;
bool needs_bind;
size_t start_offset;
size_t end_offset;
prob_t viterbi_prob;
-
- PgfLexiconIdx* lexicon_idx;
};
typedef struct PgfAnswers {
GuBuf* conts;
GuBuf* exprs;
+ PgfCCat* ccat;
prob_t outside_prob;
} PgfAnswers;
+#define PGF_EXPR_CHUNK_STATE ((size_t) -1)
+
typedef struct {
PgfAnswers* answers;
PgfExprProb ep;
- PgfPArgs* args;
- size_t arg_idx;
+ union {
+ PgfPArgs* args;
+ PgfParseState* state;
+ };
+ size_t arg_idx; // if the value is PGF_EXPR_CHUNK_STATE, then
+ // the relevant value above is state, not args.
} PgfExprState;
typedef struct PgfItemBase PgfItemBase;
@@ -281,73 +279,13 @@ pgf_item_symbols(PgfItem* item,
}
}
-static void
+PGF_INTERNAL void
pgf_print_production_args(PgfPArgs* args,
- GuOut* out, GuExn* err)
-{
- size_t n_args = gu_seq_length(args);
- for (size_t j = 0; j < n_args; j++) {
- if (j > 0)
- gu_putc(',',out,err);
+ GuOut* out, GuExn* err);
- PgfPArg arg = gu_seq_get(args, PgfPArg, j);
-
- if (arg.hypos != NULL &&
- gu_seq_length(arg.hypos) > 0) {
- size_t n_hypos = gu_seq_length(arg.hypos);
- for (size_t k = 0; k < n_hypos; k++) {
- PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
- pgf_print_fid(hypo->fid, out, err);
- gu_putc(' ',out,err);
- }
- gu_puts("-> ",out,err);
- }
-
- pgf_print_fid(arg.ccat->fid, out, err);
- }
-}
-
-static void
-pgf_print_production(int fid, PgfProduction prod,
- GuOut *out, GuExn* err, GuPool* pool)
-{
- pgf_print_fid(fid, out, err);
- gu_puts(" -> ", out, err);
-
- GuVariantInfo i = gu_variant_open(prod);
- switch (i.tag) {
- case PGF_PRODUCTION_APPLY: {
- PgfProductionApply* papp = i.data;
- gu_printf(out,err,"F%d(",papp->fun->funid);
- if (papp->fun->ep != NULL) {
- pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err);
- } else {
- PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, 0);
- gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name);
- }
- gu_printf(out,err,")[");
- pgf_print_production_args(papp->args,out,err);
- gu_printf(out,err,"]\n");
- break;
- }
- case PGF_PRODUCTION_COERCE: {
- PgfProductionCoerce* pcoerce = i.data;
- gu_puts("_[",out,err);
- pgf_print_fid(pcoerce->coerce->fid, out, err);
- gu_printf("]\n",out,err);
- break;
- }
- case PGF_PRODUCTION_EXTERN: {
- PgfProductionExtern* pext = i.data;
- gu_printf(out,err,"(");
- pgf_print_expr(pext->ep->expr, NULL, 0, out, err);
- gu_printf(out,err,")[]\n");
- break;
- }
- default:
- gu_impossible();
- }
-}
+PGF_INTERNAL void
+pgf_print_production(int fid, PgfProduction prod,
+ GuOut *out, GuExn* err);
static void
pgf_print_item_seq(PgfItem *item,
@@ -441,7 +379,9 @@ static void
pgf_print_expr_state(PgfExprState* st,
GuOut* out, GuExn* err, GuBuf* stack)
{
- gu_buf_push(stack, int, (gu_seq_length(st->args) - st->arg_idx - 1));
+ gu_buf_push(stack, int,
+ (st->arg_idx != PGF_EXPR_CHUNK_STATE) ?
+ (gu_seq_length(st->args) - st->arg_idx - 1) : 0);
if (gu_buf_length(st->answers->conts) > 0) {
PgfExprState* cont = gu_buf_get(st->answers->conts, PgfExprState*, 0);
@@ -450,6 +390,10 @@ pgf_print_expr_state(PgfExprState* st,
}
gu_puts(" (", out, err);
+ if (st->answers->ccat != NULL) {
+ pgf_print_fid(st->answers->ccat->fid,out,err);
+ gu_puts(":", out, err);
+ }
if (gu_variant_is_null(st->ep.expr))
gu_puts("_", out, err);
else
@@ -465,7 +409,8 @@ pgf_print_expr_state0(PgfExprState* st,
st->answers->outside_prob,
st->answers->outside_prob+st->ep.prob);
- size_t n_args = gu_seq_length(st->args);
+ size_t n_args = (st->arg_idx == PGF_EXPR_CHUNK_STATE) ?
+ 0 : gu_seq_length(st->args);
GuBuf* stack = gu_new_buf(int, tmp_pool);
if (n_args > 0)
@@ -493,7 +438,7 @@ pgf_print_expr_state0(PgfExprState* st,
int count = gu_buf_get(stack, int, i);
while (count-- > 0)
gu_puts(" ?", out, err);
-
+
gu_puts(")", out, err);
}
gu_puts("\n", out, err);
@@ -501,39 +446,12 @@ pgf_print_expr_state0(PgfExprState* st,
#endif
#endif
-static int
-cmp_string(GuString* psent, GuString tok, bool case_sensitive)
-{
- for (;;) {
- GuUCS c2 = gu_utf8_decode((const uint8_t**) &tok);
- if (c2 == 0)
- return 0;
+PGF_INTERNAL_DECL int
+cmp_string(PgfCohortSpot* spot, GuString tok,
+ bool case_sensitive);
- const uint8_t* p = (uint8_t*) *psent;
- GuUCS c1 = gu_utf8_decode(&p);
- if (c1 == 0)
- return -1;
-
- if (!case_sensitive)
- c1 = gu_ucs_to_lower(c1);
-
- if (c1 != c2)
- return (c1-c2);
-
- *psent = (GuString) p;
- }
-}
-
-static bool
-skip_space(GuString* psent)
-{
- const uint8_t* p = (uint8_t*) *psent;
- if (!gu_ucs_is_space(gu_utf8_decode(&p)))
- return false;
-
- *psent = (GuString) p;
- return true;
-}
+PGF_INTERNAL_DECL bool
+skip_space(GuString* psent, size_t* ppos);
static int
cmp_item_prob(GuOrder* self, const void* a, const void* b)
@@ -605,12 +523,6 @@ pgf_parsing_get_conts(PgfParseState* state,
conts->outside_prob = 0;
conts->ref_count = 0;
gu_seq_get(contss, PgfItemConts*, lin_idx) = conts;
-
-#ifdef PGF_COUNTS_DEBUG
- if (state != NULL) {
- state->ps->cont_full_count++;
- }
-#endif
}
return conts;
}
@@ -624,7 +536,7 @@ gu_ccat_fini(GuFinalizer* fin)
}
static PgfCCat*
-pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
+pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
PgfItemConts* conts,
prob_t viterbi_prob)
{
@@ -634,17 +546,20 @@ pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
cat->linrefs = conts->ccat->linrefs;
cat->viterbi_prob = viterbi_prob;
cat->fid = ps->max_fid++;
+ cat->chunk_count = (conts->ccat->fid == -5 ||
+ conts->state->end_offset == state->end_offset);
cat->conts = conts;
cat->answers = NULL;
cat->prods = NULL;
cat->n_synprods = 0;
+
gu_map_put(state->generated_cats, conts, PgfCCat*, cat);
-
+
cat->fin[0].fn = gu_ccat_fini;
gu_pool_finally(ps->pool, cat->fin);
#ifdef PGF_COUNTS_DEBUG
- state->ps->ccat_full_count++;
+ ps->ccat_full_count++;
#endif
return cat;
@@ -686,6 +601,19 @@ pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool)
break;
}
case PGF_PRODUCTION_EXTERN: {
+ PgfProductionExtern* pext = i.data;
+
+ PgfSymbols* syms;
+ if (pext->lins != NULL &&
+ (syms = gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx)) != NULL) {
+ if (item->sym_idx == gu_seq_length(syms)) {
+ item->curr_sym = gu_null_variant;
+ } else {
+ item->curr_sym = gu_seq_get(syms, PgfSymbol, item->sym_idx);
+ }
+ } else {
+ item->curr_sym = gu_null_variant;
+ }
break;
}
default:
@@ -759,7 +687,7 @@ static PgfItem*
pgf_item_copy(PgfItem* item, PgfParsing* ps)
{
PgfItem* copy;
- if (ps == NULL || ps->free_item == NULL)
+ if (ps->free_item == NULL)
copy = gu_new(PgfItem, ps->pool);
else {
copy = ps->free_item;
@@ -768,10 +696,8 @@ pgf_item_copy(PgfItem* item, PgfParsing* ps)
memcpy(copy, item, sizeof(PgfItem));
#ifdef PGF_COUNTS_DEBUG
- if (ps != NULL) {
- ps->item_full_count++;
- ps->item_real_count++;
- }
+ ps->item_full_count++;
+ ps->item_real_count++;
#endif
item->conts->ref_count++;
@@ -844,13 +770,36 @@ pgf_item_free(PgfParsing* ps, PgfItem* item)
}
static void
-pgf_result_predict(PgfParsing* ps,
- PgfExprState* cont, PgfCCat* ccat);
+pgf_result_predict(PgfParsing* ps,
+ PgfExprState* cont, PgfCCat* ccat,
+ prob_t outside_prob);
static void
pgf_result_production(PgfParsing* ps,
PgfAnswers* answers, PgfProduction prod);
+static void
+pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep);
+
+static void
+pgf_parsing_push_item(PgfParseState* state, PgfItem* item)
+{
+ if (gu_buf_length(state->agenda) == 0) {
+ state->viterbi_prob =
+ item->inside_prob+item->conts->outside_prob;
+ }
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+}
+
+static void
+pgf_parsing_push_production(PgfParsing* ps, PgfParseState* state,
+ PgfItemConts* conts, PgfProduction prod)
+{
+ PgfItem* item =
+ pgf_new_item(ps, conts, prod);
+ gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+}
+
static void
pgf_parsing_combine(PgfParsing* ps,
PgfParseState* before, PgfParseState* after,
@@ -873,16 +822,7 @@ pgf_parsing_combine(PgfParsing* ps,
}
pgf_item_advance(item, ps->pool);
- gu_buf_heap_push(before->agenda, pgf_item_prob_order, &item);
-}
-
-static void
-pgf_parsing_production(PgfParsing* ps, PgfParseState* state,
- PgfItemConts* conts, PgfProduction prod)
-{
- PgfItem* item =
- pgf_new_item(ps, conts, prod);
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ pgf_parsing_push_item(before, item);
}
static PgfProduction
@@ -946,6 +886,10 @@ pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool)
default:
gu_impossible();
}
+
+#ifdef PGF_COUNTS_DEBUG
+ ps->prod_full_count++;
+#endif
return prod;
}
@@ -964,9 +908,6 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
PgfProduction prod =
pgf_parsing_new_production(item, ep, ps->pool);
-#ifdef PGF_COUNTS_DEBUG
- ps->prod_full_count++;
-#endif
PgfCCat* tmp_ccat = pgf_parsing_get_completed(ps->before, item->conts);
PgfCCat* ccat = tmp_ccat;
@@ -991,18 +932,40 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
gu_printf(out, err, "; %d; ",
item->conts->lin_idx);
pgf_print_fid(ccat->fid, out, err);
- gu_puts("]\n", out, err);
+ gu_puts("] ", out, err);
+ pgf_print_fid(ccat->fid, out, err);
+ gu_printf(out, err, ".chunk_count=%d\n", ccat->chunk_count);
}
- pgf_print_production(ccat->fid, prod, out, err, tmp_pool);
+ pgf_print_production(ccat->fid, prod, out, err);
gu_pool_free(tmp_pool);
#endif
if (item->conts->ccat->fid == -5) {
if (ps->before->end_offset == strlen(ps->sentence)) {
PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0);
- pgf_result_predict(ps, NULL, parg->ccat);
+ pgf_result_predict(ps, NULL, parg->ccat, 0);
}
return;
+ } else {
+ size_t i = gu_seq_length(item->args);
+ while (i > 0) {
+ PgfPArg* parg = gu_seq_index(item->args, PgfPArg, i-1);
+
+ if (pgf_parsing_get_completed(ps->before, parg->ccat->conts) != NULL) {
+ parg->ccat->chunk_count++;
+
+#ifdef PGF_PARSER_DEBUG
+ GuPool* tmp_pool = gu_new_pool();
+ GuOut* out = gu_file_out(stderr, tmp_pool);
+ GuExn* err = gu_exn(tmp_pool);
+ pgf_print_fid(parg->ccat->fid, out, err);
+ gu_printf(out, err, ".chunk_count=%d\n", parg->ccat->chunk_count);
+ gu_pool_free(tmp_pool);
+#endif
+ }
+
+ i--;
+ }
}
if (tmp_ccat != NULL) {
@@ -1018,7 +981,7 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
* production immediately to the agenda,
* i.e. process it. */
if (conts2) {
- pgf_parsing_production(ps, ps->before, conts2, prod);
+ pgf_parsing_push_production(ps, ps->before, conts2, prod);
}
}
}
@@ -1039,7 +1002,7 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
* production immediately to the agenda,
* i.e. process it. */
if (conts2) {
- pgf_parsing_production(ps, state, conts2, prod);
+ pgf_parsing_push_production(ps, state, conts2, prod);
}
}
}
@@ -1059,114 +1022,9 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
}
}
-static int
-pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, bool case_sensitive)
-{
- size_t n_syms = gu_seq_length(syms);
- for (size_t i = 0; i < n_syms; i++) {
- PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
-
- if (i > 0) {
- if (!skip_space(psent)) {
- if (**psent == 0)
- return -1;
- return 1;
- }
-
- while (**psent != 0) {
- if (!skip_space(psent))
- break;
- }
- }
-
- GuVariantInfo inf = gu_variant_open(sym);
- switch (inf.tag) {
- case PGF_SYMBOL_CAT:
- case PGF_SYMBOL_LIT:
- case PGF_SYMBOL_VAR: {
- if (**psent == 0)
- return -1;
- return 1;
- }
- case PGF_SYMBOL_KS: {
- PgfSymbolKS* pks = inf.data;
- if (**psent == 0)
- return -1;
-
- int cmp = cmp_string(psent, pks->token, case_sensitive);
- if (cmp != 0)
- return cmp;
- break;
- }
- case PGF_SYMBOL_KP:
- case PGF_SYMBOL_BIND:
- case PGF_SYMBOL_NE:
- case PGF_SYMBOL_SOFT_BIND:
- case PGF_SYMBOL_SOFT_SPACE:
- case PGF_SYMBOL_CAPIT:
- case PGF_SYMBOL_ALL_CAPIT: {
- return -1;
- }
- default:
- gu_impossible();
- }
- }
-
- return 0;
-}
-
-static void
-pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
- int i, int j, ptrdiff_t min, ptrdiff_t max)
-{
- // This is a variation of a binary search algorithm which
- // can retrieve all prefixes of a string with minimal
- // comparisons, i.e. there is no need to lookup every
- // prefix separately.
-
- while (i <= j) {
- int k = (i+j) / 2;
- PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
-
- GuString start = ps->sentence + state->end_offset;
- GuString current = start;
- int cmp = pgf_symbols_cmp(¤t, seq->syms, ps->case_sensitive);
- if (cmp < 0) {
- j = k-1;
- } else if (cmp > 0) {
- ptrdiff_t len = current - start;
-
- if (min <= len)
- pgf_parsing_lookahead(ps, state, i, k-1, min, len);
-
- if (len+1 <= max)
- pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
-
- break;
- } else {
- ptrdiff_t len = current - start;
-
- if (min <= len-1)
- pgf_parsing_lookahead(ps, state, i, k-1, min, len-1);
-
- if (seq->idx != NULL) {
- PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
- entry->idx = seq->idx;
- entry->offset = (size_t) (current - ps->sentence);
- }
-
- if (len+1 <= max)
- pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
-
- break;
- }
- }
-}
-
static PgfParseState*
pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
- BIND_TYPE bind_type,
- prob_t viterbi_prob)
+ BIND_TYPE bind_type)
{
PgfParseState** pstate;
if (ps->before == NULL && start_offset == 0)
@@ -1201,7 +1059,8 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
size_t end_offset = start_offset;
GuString current = ps->sentence + end_offset;
- while (skip_space(¤t)) {
+ size_t pos = 0;
+ while (skip_space(¤t, &pos)) {
end_offset++;
}
@@ -1213,46 +1072,183 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
state->agenda = gu_new_buf(PgfItem*, ps->pool);
state->generated_cats = gu_new_addr_map(PgfItemConts*, PgfCCat*, &gu_null_struct, ps->pool);
state->conts_map = gu_new_addr_map(PgfCCat*, PgfItemContss*, &gu_null_struct, ps->pool);
+ state->chunks_map = NULL;
state->needs_bind = (bind_type == BIND_NONE) &&
(start_offset == end_offset);
state->start_offset = start_offset;
state->end_offset = end_offset;
- state->viterbi_prob = viterbi_prob;
- state->lexicon_idx =
- gu_new_buf(PgfLexiconIdxEntry, ps->pool);
+ state->viterbi_prob = 0;
if (ps->before == NULL && start_offset == 0)
state->needs_bind = false;
- if (gu_seq_length(ps->concr->sequences) > 0) {
- // Add epsilon lexical rules to the bottom up index
- PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
- if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
- PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
- entry->idx = seq->idx;
- entry->offset = state->start_offset;
- }
-
- // Add non-epsilon lexical rules to the bottom up index
- if (!state->needs_bind) {
- pgf_parsing_lookahead(ps, state,
- 0, gu_seq_length(ps->concr->sequences)-1,
- 1, strlen(ps->sentence)-state->end_offset);
- }
- }
-
-
*pstate = state;
return state;
}
+PGF_INTERNAL_DECL int
+pgf_symbols_cmp(PgfCohortSpot* spot,
+ PgfSymbols* syms, size_t* sym_idx,
+ bool case_sensitive);
+
+static bool
+pgf_parsing_scan_helper(PgfParsing *ps, PgfParseState* state,
+ int i, int j, ptrdiff_t min, ptrdiff_t max)
+{
+ // This is a variation of a binary search algorithm which
+ // can retrieve all prefixes of a string with minimal
+ // comparisons, i.e. there is no need to lookup every
+ // prefix separately.
+
+ bool found = false;
+ while (i <= j) {
+ int k = (i+j) / 2;
+ PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
+
+ PgfCohortSpot start = {0, ps->sentence+state->end_offset};
+ PgfCohortSpot current = start;
+
+ size_t sym_idx = 0;
+ int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, ps->case_sensitive);
+ if (cmp < 0) {
+ j = k-1;
+ } else if (cmp > 0) {
+ ptrdiff_t len = current.ptr - start.ptr;
+
+ if (min <= len)
+ if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
+ found = true;
+
+ if (len+1 <= max)
+ if (pgf_parsing_scan_helper(ps, state, k+1, j, len+1, max))
+ found = true;
+
+ break;
+ } else {
+ ptrdiff_t len = current.ptr - start.ptr;
+
+ if (min <= len)
+ if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
+ found = true;
+
+ // Here we do bottom-up prediction for all lexical categories.
+ // The epsilon productions will be predicted in top-down
+ // fashion while parsing.
+ if (seq->idx != NULL && len > 0) {
+ found = true;
+
+ // A new state will mark the end of the current match
+ PgfParseState* new_state =
+ pgf_new_parse_state(ps, (size_t) (current.ptr - ps->sentence), BIND_NONE);
+
+ // Bottom-up prediction for lexical rules
+ size_t n_entries = gu_buf_length(seq->idx);
+ for (size_t i = 0; i < n_entries; i++) {
+ PgfProductionIdxEntry* entry =
+ gu_buf_index(seq->idx, PgfProductionIdxEntry, i);
+
+ PgfItemConts* conts =
+ pgf_parsing_get_conts(state,
+ entry->ccat, entry->lin_idx,
+ ps->pool);
+
+ // Create the new category if it doesn't exist yet
+ PgfCCat* tmp_ccat = pgf_parsing_get_completed(new_state, conts);
+ PgfCCat* ccat = tmp_ccat;
+ if (ccat == NULL) {
+ ccat = pgf_parsing_create_completed(ps, new_state, conts, INFINITY);
+ }
+
+ // Add the production
+ if (ccat->prods == NULL || ccat->n_synprods >= gu_seq_length(ccat->prods)) {
+ ccat->prods = gu_realloc_seq(ccat->prods, PgfProduction, ccat->n_synprods+1);
+ }
+ GuVariantInfo i;
+ i.tag = PGF_PRODUCTION_APPLY;
+ i.data = entry->papp;
+ PgfProduction prod = gu_variant_close(i);
+ gu_seq_set(ccat->prods, PgfProduction, ccat->n_synprods++, prod);
+
+ // Update the category's probability to be minimum
+ if (ccat->viterbi_prob > entry->papp->fun->ep->prob)
+ ccat->viterbi_prob = entry->papp->fun->ep->prob;
+
+#ifdef PGF_PARSER_DEBUG
+ GuPool* tmp_pool = gu_new_pool();
+ GuOut* out = gu_file_out(stderr, tmp_pool);
+ GuExn* err = gu_exn(tmp_pool);
+ if (tmp_ccat == NULL) {
+ gu_printf(out, err, "[");
+ pgf_print_range(state, new_state, out, err);
+ gu_puts("; ", out, err);
+ pgf_print_fid(conts->ccat->fid, out, err);
+ gu_printf(out, err, "; %d; ",
+ conts->lin_idx);
+ pgf_print_fid(ccat->fid, out, err);
+ gu_puts("] ", out, err);
+ pgf_print_fid(ccat->fid, out, err);
+ gu_printf(out, err, ".chunk_count=%d\n", ccat->chunk_count);
+ }
+ pgf_print_production(ccat->fid, prod, out, err);
+ gu_pool_free(tmp_pool);
+#endif
+ }
+ }
+
+ if (len <= max)
+ if (pgf_parsing_scan_helper(ps, state, k+1, j, len, max))
+ found = true;
+
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void
+pgf_parsing_scan(PgfParsing *ps)
+{
+ size_t len = strlen(ps->sentence);
+
+ PgfParseState* state =
+ pgf_new_parse_state(ps, 0, BIND_SOFT);
+
+ while (state != NULL && state->end_offset < len) {
+ if (state->needs_bind) {
+ // We have encountered two tokens without space in between.
+ // Those can be accepted only if there is a BIND token
+ // in between. We encode this by having one more state
+ // at the same offset. A transition between these two
+ // states is possible only with the BIND token.
+ state =
+ pgf_new_parse_state(ps, state->end_offset, BIND_HARD);
+ }
+
+ if (!pgf_parsing_scan_helper
+ (ps, state,
+ 0, gu_seq_length(ps->concr->sequences)-1,
+ 1, len-state->end_offset)) {
+ // skip one character and try again
+ GuString s = ps->sentence+state->end_offset;
+ gu_utf8_decode((const uint8_t**) &s);
+ pgf_new_parse_state(ps, s-ps->sentence, BIND_NONE);
+ }
+
+ if (state == ps->before)
+ state = ps->after;
+ else
+ state = state->next;
+ }
+}
+
static void
pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
-{
- GuString current = ps->sentence + ps->before->end_offset;
+{
+ PgfCohortSpot current = {0, ps->sentence + ps->before->end_offset};
- if (ps->prefix != NULL && *current == 0) {
+ if (ps->prefix != NULL && *current.ptr == 0) {
if (gu_string_is_prefix(ps->prefix, tok)) {
PgfProductionApply* papp = gu_variant_data(item->prod);
@@ -1265,10 +1261,9 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
} else {
if (!ps->before->needs_bind && cmp_string(¤t, tok, ps->case_sensitive) == 0) {
PgfParseState* state =
- pgf_new_parse_state(ps, (current - ps->sentence),
- BIND_NONE,
- item->inside_prob+item->conts->outside_prob);
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ pgf_new_parse_state(ps, (current.ptr - ps->sentence),
+ BIND_NONE);
+ pgf_parsing_push_item(state, item);
} else {
pgf_item_free(ps, item);
}
@@ -1288,7 +1283,7 @@ pgf_parsing_predict_lexeme(PgfParsing* ps, PgfItemConts* conts,
item->sym_idx = gu_seq_length(syms);
prob_t prob = item->inside_prob+item->conts->outside_prob;
PgfParseState* state =
- pgf_new_parse_state(ps, offset, BIND_NONE, prob);
+ pgf_new_parse_state(ps, offset, BIND_NONE);
if (state->viterbi_prob > prob) {
state->viterbi_prob = prob;
}
@@ -1332,44 +1327,46 @@ pgf_parsing_td_predict(PgfParsing* ps,
for (size_t i = 0; i < n_prods; i++) {
PgfProduction prod =
gu_seq_get(ccat->prods, PgfProduction, i);
- pgf_parsing_production(ps, ps->before, conts, prod);
+ pgf_parsing_push_production(ps, ps->before, conts, prod);
}
} else {
// Top-down prediction for syntactic rules
for (size_t i = 0; i < ccat->n_synprods; i++) {
PgfProduction prod =
gu_seq_get(ccat->prods, PgfProduction, i);
- pgf_parsing_production(ps, ps->before, conts, prod);
+ pgf_parsing_push_production(ps, ps->before, conts, prod);
}
- // Bottom-up prediction for lexical and epsilon rules
- size_t n_idcs = gu_buf_length(ps->before->lexicon_idx);
- for (size_t i = 0; i < n_idcs; i++) {
- PgfLexiconIdxEntry* lentry =
- gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
+ // Top-down prediction for epsilon lexical rules if any
+ PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
+ if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
PgfProductionIdxEntry key;
key.ccat = ccat;
key.lin_idx = lin_idx;
key.papp = NULL;
PgfProductionIdxEntry* value =
- gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
+ gu_seq_binsearch(gu_buf_data_seq(seq->idx),
pgf_production_idx_entry_order,
PgfProductionIdxEntry, &key);
if (value != NULL) {
- pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset);
+ GuVariantInfo i = { PGF_PRODUCTION_APPLY, value->papp };
+ PgfProduction prod = gu_variant_close(i);
+ pgf_parsing_push_production(ps, ps->before, conts, prod);
PgfProductionIdxEntry* start =
- gu_buf_data(lentry->idx);
+ gu_buf_data(seq->idx);
PgfProductionIdxEntry* end =
- start + gu_buf_length(lentry->idx)-1;
+ start + gu_buf_length(seq->idx)-1;
PgfProductionIdxEntry* left = value-1;
while (left >= start &&
value->ccat->fid == left->ccat->fid &&
value->lin_idx == left->lin_idx) {
- pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset);
+ GuVariantInfo i = { PGF_PRODUCTION_APPLY, left->papp };
+ PgfProduction prod = gu_variant_close(i);
+ pgf_parsing_push_production(ps, ps->before, conts, prod);
left--;
}
@@ -1377,31 +1374,32 @@ pgf_parsing_td_predict(PgfParsing* ps,
while (right <= end &&
value->ccat->fid == right->ccat->fid &&
value->lin_idx == right->lin_idx) {
- pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset);
+ GuVariantInfo i = { PGF_PRODUCTION_APPLY, right->papp };
+ PgfProduction prod = gu_variant_close(i);
+ pgf_parsing_push_production(ps, ps->before, conts, prod);
right++;
}
}
}
}
- } else {
- /* If it has already been completed, combine. */
+ }
+ /* If the category has already been completed, combine. */
+ PgfCCat* completed =
+ pgf_parsing_get_completed(ps->before, conts);
+ if (completed) {
+ pgf_parsing_combine(ps, ps->before, ps->after, item, completed, lin_idx);
+ }
+
+ PgfParseState* state = ps->after;
+ while (state != NULL) {
PgfCCat* completed =
- pgf_parsing_get_completed(ps->before, conts);
+ pgf_parsing_get_completed(state, conts);
if (completed) {
- pgf_parsing_combine(ps, ps->before, ps->after, item, completed, lin_idx);
+ pgf_parsing_combine(ps, state, state->next, item, completed, lin_idx);
}
- PgfParseState* state = ps->after;
- while (state != NULL) {
- PgfCCat* completed =
- pgf_parsing_get_completed(state, conts);
- if (completed) {
- pgf_parsing_combine(ps, state, state->next, item, completed, lin_idx);
- }
-
- state = state->next;
- }
+ state = state->next;
}
}
@@ -1417,7 +1415,7 @@ pgf_parsing_pre(PgfParsing* ps, PgfItem* item, PgfSymbols* syms)
} else {
item->alt = 0;
pgf_item_advance(item, ps->pool);
- gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
+ pgf_parsing_push_item(ps->before, item);
}
}
@@ -1447,7 +1445,6 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
case PGF_SYMBOL_KP: {
PgfSymbolKP* skp = gu_variant_data(sym);
- PgfSymbol sym;
if (item->alt == 0) {
PgfItem* new_item;
@@ -1537,9 +1534,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
item->curr_sym = pgf_collect_extern_tok(ps,start,offset);
item->sym_idx = pgf_item_symbols_length(item);
PgfParseState* state =
- pgf_new_parse_state(ps, offset, BIND_NONE,
- item->inside_prob+item->conts->outside_prob);
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ pgf_new_parse_state(ps, offset, BIND_NONE);
+ pgf_parsing_push_item(state, item);
match = true;
}
}
@@ -1582,11 +1578,10 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
if (ps->before->start_offset == ps->before->end_offset &&
ps->before->needs_bind) {
PgfParseState* state =
- pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
- item->inside_prob+item->conts->outside_prob);
+ pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
if (state != NULL) {
pgf_item_advance(item, ps->pool);
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ pgf_parsing_push_item(state, item);
} else {
pgf_item_free(ps, item);
}
@@ -1600,11 +1595,10 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
if (ps->before->start_offset == ps->before->end_offset) {
if (ps->before->needs_bind) {
PgfParseState* state =
- pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
- item->inside_prob+item->conts->outside_prob);
+ pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
if (state != NULL) {
pgf_item_advance(item, ps->pool);
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ pgf_parsing_push_item(state, item);
} else {
pgf_item_free(ps, item);
}
@@ -1613,7 +1607,7 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
}
} else {
pgf_item_advance(item, ps->pool);
- gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
+ pgf_parsing_push_item(ps->before, item);
}
break;
}
@@ -1628,84 +1622,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
}
}
-static void
-pgf_parsing_item(PgfParsing* ps, PgfItem* item)
-{
-#ifdef PGF_PARSER_DEBUG
- GuPool* tmp_pool = gu_new_pool();
- GuOut* out = gu_file_out(stderr, tmp_pool);
- GuExn* err = gu_exn(tmp_pool);
- pgf_print_item(item, ps->before, out, err, tmp_pool);
- gu_pool_free(tmp_pool);
-#endif
-
- GuVariantInfo i = gu_variant_open(item->prod);
- switch (i.tag) {
- case PGF_PRODUCTION_APPLY: {
- PgfProductionApply* papp = i.data;
- PgfCncFun* fun = papp->fun;
- PgfSymbols* syms = fun->lins[item->conts->lin_idx]->syms;
- if (item->sym_idx == gu_seq_length(syms)) {
- pgf_parsing_complete(ps, item, NULL);
- pgf_item_free(ps, item);
- } else {
- pgf_parsing_symbol(ps, item, item->curr_sym);
- }
- break;
- }
- case PGF_PRODUCTION_COERCE: {
- PgfProductionCoerce* pcoerce = i.data;
- switch (item->sym_idx) {
- case 0:
- if (pcoerce->coerce->prods == NULL) {
- // empty category
- pgf_item_free(ps, item);
- return;
- }
-
- pgf_parsing_td_predict(ps, item,
- pcoerce->coerce,
- item->conts->lin_idx);
- break;
- case 1:
- pgf_parsing_complete(ps, item, NULL);
- pgf_item_free(ps, item);
- break;
- default:
- gu_impossible();
- }
- break;
- }
- case PGF_PRODUCTION_EXTERN: {
- PgfProductionExtern* pext = i.data;
-
- PgfSymbols* syms;
- if (pext->lins != NULL &&
- (syms = gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx)) != NULL) {
- if (item->sym_idx == gu_seq_length(syms)) {
- pgf_parsing_complete(ps, item, NULL);
- pgf_item_free(ps, item);
- } else {
- PgfSymbol sym =
- gu_seq_get(syms, PgfSymbol, item->sym_idx);
- pgf_parsing_symbol(ps, item, sym);
- }
- } else {
- pgf_parsing_complete(ps, item, pext->ep);
- pgf_item_free(ps, item);
- }
- break;
- }
- default:
- gu_impossible();
- }
-}
-
static void
pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr)
{
PgfFlag* flag;
-
+
flag =
gu_seq_binsearch(abstr->aflags, pgf_flag_order, PgfFlag, "heuristic_search_factor");
if (flag != NULL) {
@@ -1715,6 +1636,9 @@ pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr)
}
}
+PGF_INTERNAL_DECL bool
+pgf_is_case_sensitive(PgfConcr* concr);
+
static PgfParsing*
pgf_new_parsing(PgfConcr* concr, GuString sentence,
PgfCallbacksMap* callbacks, PgfOracleCallback* oracle,
@@ -1725,8 +1649,7 @@ pgf_new_parsing(PgfConcr* concr, GuString sentence,
ps->pool = pool;
ps->out_pool = out_pool;
ps->sentence = sentence;
- ps->case_sensitive =
- (gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive") == NULL);
+ ps->case_sensitive = pgf_is_case_sensitive(concr);
ps->expr_queue = gu_new_buf(PgfExprState*, pool);
ps->max_fid = concr->total_cats;
ps->before = NULL;
@@ -1734,7 +1657,6 @@ pgf_new_parsing(PgfConcr* concr, GuString sentence,
#ifdef PGF_COUNTS_DEBUG
ps->item_full_count = 0;
ps->item_real_count = 0;
- ps->cont_full_count = 0;
ps->ccat_full_count = 0;
ps->prod_full_count = 0;
#endif
@@ -1754,10 +1676,9 @@ pgf_new_parsing(PgfConcr* concr, GuString sentence,
static void
pgf_parsing_print_counts(PgfParsing* ps)
{
- printf("%d\t%d\t%d\t%d\t%d\n",
+ printf("%d\t%d\t%d\t%d\n",
ps->item_full_count,
ps->item_real_count,
- ps->cont_full_count,
ps->ccat_full_count,
ps->prod_full_count);
}
@@ -1814,7 +1735,7 @@ pgf_result_production(PgfParsing* ps,
st->args = gu_empty_seq();
st->arg_idx = 0;
- pgf_result_predict(ps, st, ccat);
+ pgf_result_predict(ps, st, ccat, answers->outside_prob);
break;
}
case PGF_PRODUCTION_EXTERN: {
@@ -1836,21 +1757,16 @@ pgf_result_production(PgfParsing* ps,
static void
pgf_result_predict(PgfParsing* ps,
- PgfExprState* cont, PgfCCat* ccat)
+ PgfExprState* cont, PgfCCat* ccat,
+ prob_t outside_prob)
{
- prob_t outside_prob = 0;
- if (cont != NULL) {
- cont->ep.prob -= ccat->viterbi_prob;
- outside_prob =
- cont->answers->outside_prob+cont->ep.prob;
- }
-
PgfAnswers* answers = ccat->answers;
if (answers == NULL) {
answers = gu_new(PgfAnswers, ps->pool);
answers->conts = gu_new_buf(PgfExprState*, ps->pool);
answers->exprs = gu_new_buf(PgfExprProb*, ps->pool);
answers->outside_prob = outside_prob;
+ answers->ccat = ccat;
ccat->answers = answers;
}
@@ -1882,8 +1798,14 @@ pgf_result_predict(PgfParsing* ps,
.fun = cont->ep.expr,
.arg = ep->expr);
st->ep.prob = cont->ep.prob+ep->prob;
- st->args = cont->args;
- st->arg_idx = cont->arg_idx+1;
+
+ if (cont->arg_idx == PGF_EXPR_CHUNK_STATE) {
+ st->state = gu_map_get(cont->state->chunks_map, ccat, PgfParseState*);
+ st->arg_idx = PGF_EXPR_CHUNK_STATE;
+ } else {
+ st->args = cont->args;
+ st->arg_idx = cont->arg_idx+1;
+ }
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st);
}
@@ -1935,8 +1857,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
ps->heuristic_factor = heuristic_factor;
}
- PgfParseState* state =
- pgf_new_parse_state(ps, 0, BIND_SOFT, 0);
+ pgf_parsing_scan(ps);
int fidString = -1;
PgfCCat* start_ccat = gu_new(PgfCCat, ps->pool);
@@ -1945,6 +1866,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
start_ccat->linrefs = NULL;
start_ccat->viterbi_prob = 0;
start_ccat->fid = -5;
+ start_ccat->chunk_count = 1;
start_ccat->conts = NULL;
start_ccat->answers = NULL;
start_ccat->prods = NULL;
@@ -1953,17 +1875,13 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
gu_assert(start_ccat->cnccat != NULL);
#ifdef PGF_COUNTS_DEBUG
- state->ps->ccat_full_count++;
+ ps->ccat_full_count++;
#endif
PgfItemConts* conts =
- pgf_parsing_get_conts(state, start_ccat, 0, ps->pool);
+ pgf_parsing_get_conts(ps->before, start_ccat, 0, ps->pool);
gu_buf_push(conts->items, PgfItem*, NULL);
-#ifdef PGF_COUNTS_DEBUG
- ps->cont_full_count++;
-#endif
-
size_t n_ccats = gu_seq_length(cnccat->cats);
for (size_t i = 0; i < n_ccats; i++) {
PgfCCat* ccat = gu_seq_get(cnccat->cats, PgfCCat*, i);
@@ -1983,7 +1901,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
PgfItem* item = gu_new(PgfItem, ps->pool);
item->args = args;
- item->inside_prob += ccat->viterbi_prob;
+ item->inside_prob = 0;
item->conts = conts;
item->prod = prod;
item->curr_sym = gu_null_variant;
@@ -2000,7 +1918,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
ps->item_real_count++;
#endif
- gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
+ gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
}
}
}
@@ -2018,10 +1936,18 @@ pgf_parsing_proceed(PgfParsing* ps)
best_prob = gu_buf_get(ps->expr_queue, PgfExprState*, 0)->ep.prob;
}
- prob_t delta_prob = 0;
- PgfParseState* st = ps->before;
+ PgfParseState* st = ps->before;
+ PgfParseState* last = NULL;
+ prob_t delta_prob = 0;
while (st != NULL) {
if (gu_buf_length(st->agenda) > 0) {
+ if (last != NULL) {
+ delta_prob +=
+ (last->viterbi_prob-st->viterbi_prob) *
+ ps->heuristic_factor;
+ }
+ last = st;
+
PgfItem* item = gu_buf_get(st->agenda, PgfItem*, 0);
prob_t item_prob =
item->inside_prob+item->conts->outside_prob+delta_prob;
@@ -2034,22 +1960,32 @@ pgf_parsing_proceed(PgfParsing* ps)
ps->after = ps->before;
ps->before = tmp;
}
-
+
has_progress = true;
}
}
- prob_t state_delta =
- (st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))*
- ps->heuristic_factor;
- delta_prob += state_delta;
st = st->next;
}
if (has_progress) {
PgfItem* item;
gu_buf_heap_pop(ps->before->agenda, pgf_item_prob_order, &item);
- pgf_parsing_item(ps, item);
+
+#ifdef PGF_PARSER_DEBUG
+ GuPool* tmp_pool = gu_new_pool();
+ GuOut* out = gu_file_out(stderr, tmp_pool);
+ GuExn* err = gu_exn(tmp_pool);
+ pgf_print_item(item, ps->before, out, err, tmp_pool);
+ gu_pool_free(tmp_pool);
+#endif
+
+ if (gu_variant_is_null(item->curr_sym)) {
+ pgf_parsing_complete(ps, item, NULL);
+ pgf_item_free(ps, item);
+ } else {
+ pgf_parsing_symbol(ps, item, item->curr_sym);
+ }
}
while (ps->after != NULL) {
@@ -2062,6 +1998,28 @@ pgf_parsing_proceed(PgfParsing* ps)
return has_progress;
}
+typedef struct {
+ GuMapItor fn;
+ PgfParsing* ps;
+ PgfExprState* st;
+} PgfChunkCatItor;
+
+static void
+pgf_iter_chunk_cat(GuMapItor* fn,
+ const void* key, void* value,
+ GuExn *err)
+{
+ PgfChunkCatItor* clo = (PgfChunkCatItor*) fn;
+ PgfCCat* ccat = (PgfCCat*) key;
+
+ prob_t outside_prob =
+ clo->st->answers->outside_prob+
+ clo->st->ep.prob+
+ ccat->cnccat->abscat->prob;
+
+ pgf_result_predict(clo->ps, clo->st, ccat, outside_prob);
+}
+
static PgfExprProb*
pgf_parse_result_next(PgfParsing* ps)
{
@@ -2084,11 +2042,28 @@ pgf_parse_result_next(PgfParsing* ps)
#endif
#endif
- if (st->arg_idx < gu_seq_length(st->args)) {
+ if (st->arg_idx == PGF_EXPR_CHUNK_STATE) {
+ // here we look for chunks
+
+ if (st->state == ps->before) {
+ if (pgf_parse_result_is_new(st)) {
+ gu_buf_push(st->answers->exprs, PgfExprProb*, &st->ep);
+ return &st->ep;
+ }
+ } else {
+ PgfChunkCatItor clo = { { pgf_iter_chunk_cat }, ps, st };
+ if (st->state->chunks_map != NULL)
+ gu_map_iter(st->state->chunks_map, &clo.fn, NULL);
+ }
+ } else if (st->arg_idx < gu_seq_length(st->args)) {
+ // here we handle normal unfinished expression states
+
PgfCCat* ccat =
gu_seq_index(st->args, PgfPArg, st->arg_idx)->ccat;
if (ccat->fid < ps->concr->total_cats) {
+ // when argument was not used by the parser,
+ // we create a metavariable
PgfExpr meta = gu_new_variant_i(ps->out_pool,
PGF_EXPR_META, PgfExprMeta,
.id = 0);
@@ -2103,7 +2078,10 @@ pgf_parse_result_next(PgfParsing* ps)
st->arg_idx++;
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st);
} else {
- pgf_result_predict(ps, st, ccat);
+ prob_t outside_prob =
+ st->answers->outside_prob+
+ st->ep.prob-ccat->viterbi_prob;
+ pgf_result_predict(ps, st, ccat, outside_prob);
}
} else if (pgf_parse_result_is_new(st)) {
gu_buf_push(st->answers->exprs, PgfExprProb*, &st->ep);
@@ -2111,7 +2089,7 @@ pgf_parse_result_next(PgfParsing* ps)
size_t n_conts = gu_buf_length(st->answers->conts);
for (size_t i = 0; i < n_conts; i++) {
PgfExprState* st2 = gu_buf_get(st->answers->conts, PgfExprState*, i);
-
+
if (st2 == NULL) {
return &st->ep;
}
@@ -2125,9 +2103,17 @@ pgf_parse_result_next(PgfParsing* ps)
PGF_EXPR_APP, PgfExprApp,
.fun = st2->ep.expr,
.arg = st->ep.expr);
- st3->ep.prob = st2->ep.prob + st->ep.prob;
- st3->args = st2->args;
- st3->arg_idx = st2->arg_idx+1;
+ if (st2->arg_idx == PGF_EXPR_CHUNK_STATE) {
+ st3->ep.prob = st2->ep.prob+st->answers->ccat->cnccat->abscat->prob +
+ st->ep.prob;
+ st3->state = gu_map_get(st2->state->chunks_map, st->answers->ccat, PgfParseState*);
+ st3->arg_idx = PGF_EXPR_CHUNK_STATE;
+ } else {
+ st3->ep.prob = st2->ep.prob-st->answers->ccat->viterbi_prob +
+ st->ep.prob;
+ st3->args = st2->args;
+ st3->arg_idx = st2->arg_idx+1;
+ }
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st3);
}
@@ -2186,6 +2172,126 @@ pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
return pgf_parse_with_heuristics(concr, typ, sentence, -1.0, callbacks, err, pool, out_pool);
}
+static void
+pgf_iter_generated_cats(PgfParsing* ps, PgfParseState* next_state);
+
+static void
+pgf_process_generated_cat(PgfParsing* ps,
+ PgfParseState* state, PgfParseState* next_state,
+ PgfCCat* ccat)
+{
+ bool just_coercions = true;
+
+ PgfCCat* children[ccat->n_synprods];
+ for (size_t i = 0; i < ccat->n_synprods; i++) {
+ PgfProduction prod =
+ gu_seq_get(ccat->prods, PgfProduction, i);
+
+ children[i] = NULL;
+
+ GuVariantInfo inf = gu_variant_open(prod);
+ switch (inf.tag) {
+ case PGF_PRODUCTION_APPLY: {
+ PgfProductionApply* papp = inf.data;
+
+ size_t j = gu_seq_length(papp->args);
+ while (j > 0) {
+ PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, j-1);
+
+ if (pgf_parsing_get_completed(state, parg->ccat->conts) != NULL &&
+ ccat->conts->state->end_offset == parg->ccat->conts->state->end_offset) {
+ children[i] = parg->ccat;
+ break;
+ }
+
+ j--;
+ }
+
+ if (children[i] == NULL) {
+ just_coercions = false;
+ break;
+ }
+ break;
+ }
+ case PGF_PRODUCTION_COERCE: {
+ PgfProductionCoerce* pcoerce = inf.data;
+ children[i] = pcoerce->coerce;
+ break;
+ }
+ }
+ }
+
+ if (just_coercions) {
+ ccat->chunk_count++;
+
+ for (size_t i = 0; i < ccat->n_synprods; i++) {
+ children[i]->chunk_count--;
+
+#ifdef PGF_PARSER_DEBUG
+ GuPool* tmp_pool = gu_new_pool();
+ GuOut* out = gu_file_out(stderr, tmp_pool);
+ GuExn* err = gu_exn(tmp_pool);
+ pgf_print_fid(children[i]->fid, out, err);
+ gu_printf(out, err, ".chunk_count=%d\n", children[i]->chunk_count);
+ gu_pool_free(tmp_pool);
+#endif
+
+ if (children[i]->chunk_count == 0) {
+ pgf_process_generated_cat(ps, state, next_state, children[i]);
+ }
+ }
+ } else {
+ PgfParseState* prev_state = ccat->conts->state;
+ if (prev_state->chunks_map == NULL) {
+ pgf_iter_generated_cats(ps, prev_state);
+
+ if (prev_state->chunks_map == NULL) {
+ prev_state->chunks_map =
+ gu_new_addr_map(PgfCCat*, PgfParseState*,
+ &gu_null_struct, ps->pool);
+ }
+ }
+
+#ifdef PGF_PARSER_DEBUG
+ GuPool* tmp_pool = gu_new_pool();
+ GuOut* out = gu_file_out(stderr, tmp_pool);
+ GuExn* err = gu_exn(tmp_pool);
+ gu_printf(out, err, "[%d - ", prev_state->end_offset);
+ pgf_print_fid(ccat->fid, out, err);
+ gu_printf(out, err, " - %d]\n", next_state->start_offset);
+ gu_pool_free(tmp_pool);
+#endif
+
+ gu_map_put(prev_state->chunks_map, ccat, PgfParseState*, next_state);
+ }
+}
+
+static void
+pgf_iter_generated_cats(PgfParsing* ps, PgfParseState* next_state)
+{
+ size_t count = 0;
+ PgfParseState* state = next_state;
+
+ for (;;) {
+ size_t i = 0;
+ PgfCCat* ccat;
+ PgfItemConts* conts;
+ while (gu_map_next(state->generated_cats, &i, (void**)&conts, &ccat)) {
+ if (ccat->chunk_count > 0)
+ continue;
+
+ count++;
+
+ pgf_process_generated_cat(ps, state, next_state, ccat);
+ }
+
+ if (count > 0 || state->next == NULL)
+ break;
+
+ state = state->next;
+ }
+}
+
PGF_API GuEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
double heuristics,
@@ -2217,7 +2323,34 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
if (!pgf_parsing_proceed(ps)) {
GuExnData* exn = gu_raise(err, PgfParseError);
exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
- return NULL;
+
+ PgfExprState* st = gu_new(PgfExprState, ps->pool);
+ st->answers = gu_new(PgfAnswers, ps->pool);
+ st->answers->conts = gu_new_buf(PgfExprState*, ps->pool);
+ st->answers->exprs = gu_new_buf(PgfExprProb*, ps->pool);
+ st->answers->ccat = NULL;
+ st->answers->outside_prob = 0;
+ st->ep.expr =
+ gu_new_variant_i(ps->out_pool,
+ PGF_EXPR_META, PgfExprMeta,
+ .id = 0);
+ st->ep.prob = 0;
+ st->state = NULL;
+ st->arg_idx = PGF_EXPR_CHUNK_STATE;
+
+ pgf_iter_generated_cats(ps, ps->before);
+
+ PgfParseState* state = ps->before;
+ while (state != NULL) {
+ if (state->chunks_map != NULL)
+ st->state = state;
+ state = state->next;
+ }
+
+ if (st->state != NULL) {
+ gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st);
+ }
+ break;
}
#ifdef PGF_COUNTS_DEBUG
@@ -2340,172 +2473,6 @@ pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
return &ps->en;
}
-static void
-pgf_morpho_iter(PgfProductionIdx* idx,
- PgfMorphoCallback* callback,
- GuExn* err)
-{
- size_t n_entries = gu_buf_length(idx);
- for (size_t i = 0; i < n_entries; i++) {
- PgfProductionIdxEntry* entry =
- gu_buf_index(idx, PgfProductionIdxEntry, i);
-
- PgfCId lemma = entry->papp->fun->absfun->name;
- GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
-
- prob_t prob = entry->ccat->cnccat->abscat->prob +
- entry->papp->fun->absfun->ep.prob;
- callback->callback(callback,
- lemma, analysis, prob, err);
- if (!gu_ok(err))
- return;
- }
-}
-
-typedef struct {
- GuOrder order;
- bool case_sensitive;
-} PgfSequenceOrder;
-
-static int
-pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
-{
- PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order);
- GuString sent = (GuString) p1;
- const PgfSequence* sp2 = p2;
-
- int res = pgf_symbols_cmp(&sent, sp2->syms, self->case_sensitive);
- if (res == 0 && *sent != 0) {
- res = 1;
- }
-
- return res;
-}
-
-PGF_API void
-pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
- PgfMorphoCallback* callback, GuExn* err)
-{
- if (concr->sequences == NULL) {
- GuExnData* err_data = gu_raise(err, PgfExn);
- if (err_data) {
- err_data->data = "The concrete syntax is not loaded";
- return;
- }
- }
-
- bool case_sensitive =
- (gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive") == NULL);
-
- PgfSequenceOrder order = { { pgf_sequence_cmp_fn }, case_sensitive };
- PgfSequence* seq = (PgfSequence*)
- gu_seq_binsearch(concr->sequences, &order.order,
- PgfSequence, (void*) sentence);
-
- if (seq != NULL && seq->idx != NULL)
- pgf_morpho_iter(seq->idx, callback, err);
-}
-
-typedef struct {
- GuEnum en;
- PgfSequences* sequences;
- GuString prefix;
- size_t seq_idx;
-} PgfFullFormState;
-
-struct PgfFullFormEntry {
- GuString tokens;
- PgfProductionIdx* idx;
-};
-
-static void
-gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
-{
- PgfFullFormState* st = gu_container(self, PgfFullFormState, en);
- PgfFullFormEntry* entry = NULL;
-
- if (st->sequences != NULL) {
- size_t n_seqs = gu_seq_length(st->sequences);
- while (st->seq_idx < n_seqs) {
- PgfSequence* seq = gu_seq_index(st->sequences, PgfSequence, st->seq_idx);
- GuString tokens = pgf_get_tokens(seq->syms, 0, pool);
-
- if (!gu_string_is_prefix(st->prefix, tokens)) {
- st->seq_idx = n_seqs;
- break;
- }
-
- if (*tokens != 0 && seq->idx != NULL) {
- entry = gu_new(PgfFullFormEntry, pool);
- entry->tokens = tokens;
- entry->idx = seq->idx;
-
- st->seq_idx++;
- break;
- }
-
- st->seq_idx++;
- }
- }
-
- *((PgfFullFormEntry**) to) = entry;
-}
-
-PGF_API GuEnum*
-pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
-{
- PgfFullFormState* st = gu_new(PgfFullFormState, pool);
- st->en.next = gu_fullform_enum_next;
- st->sequences = concr->sequences;
- st->prefix = "";
- st->seq_idx = 0;
- return &st->en;
-}
-
-PGF_API GuString
-pgf_fullform_get_string(PgfFullFormEntry* entry)
-{
- return entry->tokens;
-}
-
-PGF_API void
-pgf_fullform_get_analyses(PgfFullFormEntry* entry,
- PgfMorphoCallback* callback, GuExn* err)
-{
- pgf_morpho_iter(entry->idx, callback, err);
-}
-
-PGF_API GuEnum*
-pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
- GuPool* pool, GuExn* err)
-{
- if (concr->sequences == NULL) {
- GuExnData* err_data = gu_raise(err, PgfExn);
- if (err_data) {
- err_data->data = "The concrete syntax is not loaded";
- return NULL;
- }
- }
-
- PgfFullFormState* state = gu_new(PgfFullFormState, pool);
- state->en.next = gu_fullform_enum_next;
- state->sequences = concr->sequences;
- state->prefix = prefix;
- state->seq_idx = 0;
-
- bool case_sensitive =
- (gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive") == NULL);
-
- PgfSequenceOrder order = { { pgf_sequence_cmp_fn }, case_sensitive };
- if (!gu_seq_binsearch_index(concr->sequences, &order.order,
- PgfSequence, (void*) prefix,
- &state->seq_idx)) {
- state->seq_idx++;
- }
-
- return &state->en;
-}
-
PGF_API void
pgf_parser_index(PgfConcr* concr,
PgfCCat* ccat, PgfProduction prod,
diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h
index ea4c97335..3d5f1f84f 100644
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -162,6 +162,22 @@ PGF_API_DECL void
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err);
+typedef struct {
+ size_t pos;
+ GuString ptr;
+} PgfCohortSpot;
+
+typedef struct {
+ PgfCohortSpot start;
+ PgfCohortSpot end;
+ GuBuf* buf;
+} PgfCohortRange;
+
+PGF_API_DECL GuEnum*
+pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
+ PgfMorphoCallback* callback,
+ GuPool* pool, GuExn* err);
+
typedef struct PgfFullFormEntry PgfFullFormEntry;
PGF_API_DECL GuEnum*
diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c
index 417d78e84..1843bcbdb 100644
--- a/src/runtime/c/pgf/printer.c
+++ b/src/runtime/c/pgf/printer.c
@@ -98,6 +98,74 @@ pgf_print_fid(int fid, GuOut* out, GuExn* err)
gu_printf(out, err, "C%d", fid);
}
+PGF_INTERNAL void
+pgf_print_production_args(PgfPArgs* args,
+ GuOut* out, GuExn* err)
+{
+ size_t n_args = gu_seq_length(args);
+ for (size_t j = 0; j < n_args; j++) {
+ if (j > 0)
+ gu_putc(',',out,err);
+
+ PgfPArg arg = gu_seq_get(args, PgfPArg, j);
+
+ if (arg.hypos != NULL &&
+ gu_seq_length(arg.hypos) > 0) {
+ size_t n_hypos = gu_seq_length(arg.hypos);
+ for (size_t k = 0; k < n_hypos; k++) {
+ PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
+ pgf_print_fid(hypo->fid, out, err);
+ gu_putc(' ',out,err);
+ }
+ gu_puts("-> ",out,err);
+ }
+
+ pgf_print_fid(arg.ccat->fid, out, err);
+ }
+}
+
+PGF_INTERNAL void
+pgf_print_production(int fid, PgfProduction prod,
+ GuOut *out, GuExn* err)
+{
+ pgf_print_fid(fid, out, err);
+ gu_puts(" -> ", out, err);
+
+ GuVariantInfo i = gu_variant_open(prod);
+ switch (i.tag) {
+ case PGF_PRODUCTION_APPLY: {
+ PgfProductionApply* papp = i.data;
+ gu_printf(out,err,"F%d(",papp->fun->funid);
+ if (papp->fun->ep != NULL) {
+ pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err);
+ } else {
+ PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, 0);
+ gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name);
+ }
+ gu_printf(out,err,")[");
+ pgf_print_production_args(papp->args,out,err);
+ gu_printf(out,err,"]\n");
+ break;
+ }
+ case PGF_PRODUCTION_COERCE: {
+ PgfProductionCoerce* pcoerce = i.data;
+ gu_puts("_[",out,err);
+ pgf_print_fid(pcoerce->coerce->fid, out, err);
+ gu_puts("]\n",out,err);
+ break;
+ }
+ case PGF_PRODUCTION_EXTERN: {
+ PgfProductionExtern* pext = i.data;
+ gu_printf(out,err,"(");
+ pgf_print_expr(pext->ep->expr, NULL, 0, out, err);
+ gu_printf(out,err,")[]\n");
+ break;
+ }
+ default:
+ gu_impossible();
+ }
+}
+
static void
pgf_print_productions(GuMapItor* fn, const void* key, void* value,
GuExn* err)
@@ -111,48 +179,7 @@ pgf_print_productions(GuMapItor* fn, const void* key, void* value,
size_t n_prods = gu_seq_length(ccat->prods);
for (size_t i = 0; i < n_prods; i++) {
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
-
- gu_puts(" ", out, err);
- pgf_print_fid(fid, out, err);
- gu_puts(" -> ", out, err);
-
- GuVariantInfo i = gu_variant_open(prod);
- switch (i.tag) {
- case PGF_PRODUCTION_APPLY: {
- PgfProductionApply* papp = i.data;
- gu_printf(out,err,"F%d[",papp->fun->funid);
- size_t n_args = gu_seq_length(papp->args);
- for (size_t j = 0; j < n_args; j++) {
- if (j > 0)
- gu_putc(',',out,err);
-
- PgfPArg arg = gu_seq_get(papp->args, PgfPArg, j);
-
- if (arg.hypos != NULL) {
- size_t n_hypos = gu_seq_length(arg.hypos);
- for (size_t k = 0; k < n_hypos; k++) {
- if (k > 0)
- gu_putc(' ',out,err);
- PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
- pgf_print_fid(hypo->fid, out, err);
- }
- }
-
- pgf_print_fid(arg.ccat->fid, out, err);
- }
- gu_printf(out,err,"]\n");
- break;
- }
- case PGF_PRODUCTION_COERCE: {
- PgfProductionCoerce* pcoerce = i.data;
- gu_puts("_[", out, err);
- pgf_print_fid(pcoerce->coerce->fid, out, err);
- gu_puts("]\n", out, err);
- break;
- }
- default:
- gu_impossible();
- }
+ pgf_print_production(fid, prod, out, err);
}
}
}
diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c
index 522d69b83..d6c87e3e0 100644
--- a/src/runtime/c/pgf/reader.c
+++ b/src/runtime/c/pgf/reader.c
@@ -328,16 +328,20 @@ pgf_read_patt(PgfReader* rdr)
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_PATT_APP: {
+ PgfCId ctor = pgf_read_cid(rdr, rdr->opool);
+ gu_return_on_exn(rdr->err, gu_null_variant);
+
+ size_t n_args = pgf_read_len(rdr);
+ gu_return_on_exn(rdr->err, gu_null_variant);
+
PgfPattApp *papp =
- gu_new_variant(PGF_PATT_APP,
- PgfPattApp,
- &patt, rdr->opool);
- papp->ctor = pgf_read_cid(rdr, rdr->opool);
- gu_return_on_exn(rdr->err, gu_null_variant);
-
- papp->n_args = pgf_read_len(rdr);
- gu_return_on_exn(rdr->err, gu_null_variant);
-
+ gu_new_flex_variant(PGF_PATT_APP,
+ PgfPattApp,
+ args, n_args,
+ &patt, rdr->opool);
+ papp->ctor = ctor;
+ papp->n_args = n_args;
+
for (size_t i = 0; i < papp->n_args; i++) {
papp->args[i] = pgf_read_patt(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
@@ -840,6 +844,7 @@ pgf_read_fid(PgfReader* rdr, PgfConcr* concr)
ccat->prods = NULL;
ccat->viterbi_prob = 0;
ccat->fid = fid;
+ ccat->chunk_count = 1;
ccat->conts = NULL;
ccat->answers = NULL;
@@ -1077,6 +1082,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
ccat->prods = NULL;
ccat->viterbi_prob = 0;
ccat->fid = fid;
+ ccat->chunk_count = 1;
ccat->conts = NULL;
ccat->answers = NULL;
diff --git a/src/runtime/c/pgf/scanner.c b/src/runtime/c/pgf/scanner.c
new file mode 100644
index 000000000..e8de23afb
--- /dev/null
+++ b/src/runtime/c/pgf/scanner.c
@@ -0,0 +1,516 @@
+#include
+#include
+#include
+#include
+
+PGF_INTERNAL int
+cmp_string(PgfCohortSpot* spot, GuString tok,
+ bool case_sensitive)
+{
+ for (;;) {
+ GuUCS c2 = gu_utf8_decode((const uint8_t**) &tok);
+ if (c2 == 0)
+ return 0;
+
+ const uint8_t* p = (uint8_t*) spot->ptr;
+ GuUCS c1 = gu_utf8_decode(&p);
+ if (c1 == 0)
+ return -1;
+
+ if (!case_sensitive) {
+ c1 = gu_ucs_to_lower(c1);
+ c2 = gu_ucs_to_lower(c2);
+ }
+
+ if (c1 != c2)
+ return (c1-c2);
+
+ spot->ptr = (GuString) p;
+ spot->pos++;
+ }
+}
+
+PGF_INTERNAL bool
+skip_space(GuString* psent, size_t* ppos)
+{
+ const uint8_t* p = (uint8_t*) *psent;
+ if (!gu_ucs_is_space(gu_utf8_decode(&p)))
+ return false;
+
+ *psent = (GuString) p;
+ (*ppos)++;
+ return true;
+}
+
+PGF_INTERNAL int
+pgf_symbols_cmp(PgfCohortSpot* spot,
+ PgfSymbols* syms, size_t* sym_idx,
+ bool case_sensitive)
+{
+ size_t n_syms = gu_seq_length(syms);
+ while (*sym_idx < n_syms) {
+ PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
+
+ if (*sym_idx > 0) {
+ if (!skip_space(&spot->ptr,&spot->pos)) {
+ if (*spot->ptr == 0)
+ return -1;
+ return 1;
+ }
+
+ while (*spot->ptr != 0) {
+ if (!skip_space(&spot->ptr,&spot->pos))
+ break;
+ }
+ }
+
+ GuVariantInfo inf = gu_variant_open(sym);
+ switch (inf.tag) {
+ case PGF_SYMBOL_CAT:
+ case PGF_SYMBOL_LIT:
+ case PGF_SYMBOL_VAR: {
+ if (*spot->ptr == 0)
+ return -1;
+ return 1;
+ }
+ case PGF_SYMBOL_KS: {
+ PgfSymbolKS* pks = inf.data;
+ if (*spot->ptr == 0)
+ return -1;
+
+ int cmp = cmp_string(spot,pks->token, case_sensitive);
+ if (cmp != 0)
+ return cmp;
+ break;
+ }
+ case PGF_SYMBOL_KP:
+ case PGF_SYMBOL_BIND:
+ case PGF_SYMBOL_NE:
+ case PGF_SYMBOL_SOFT_BIND:
+ case PGF_SYMBOL_SOFT_SPACE:
+ case PGF_SYMBOL_CAPIT:
+ case PGF_SYMBOL_ALL_CAPIT: {
+ return -1;
+ }
+ default:
+ gu_impossible();
+ }
+
+ (*sym_idx)++;
+ }
+
+ return 0;
+}
+
+static void
+pgf_morpho_iter(PgfProductionIdx* idx,
+ PgfMorphoCallback* callback,
+ GuExn* err)
+{
+ size_t n_entries = gu_buf_length(idx);
+ for (size_t i = 0; i < n_entries; i++) {
+ PgfProductionIdxEntry* entry =
+ gu_buf_index(idx, PgfProductionIdxEntry, i);
+
+ PgfCId lemma = entry->papp->fun->absfun->name;
+ GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
+
+ prob_t prob = entry->ccat->cnccat->abscat->prob +
+ entry->papp->fun->absfun->ep.prob;
+ callback->callback(callback,
+ lemma, analysis, prob, err);
+ if (!gu_ok(err))
+ return;
+ }
+}
+
+typedef struct {
+ GuOrder order;
+ bool case_sensitive;
+} PgfSequenceOrder;
+
+PGF_INTERNAL bool
+pgf_is_case_sensitive(PgfConcr* concr)
+{
+ PgfFlag* flag =
+ gu_seq_binsearch(concr->cflags, pgf_flag_order, PgfFlag, "case_sensitive");
+ if (flag != NULL) {
+ GuVariantInfo inf = gu_variant_open(flag->value);
+ if (inf.tag == PGF_LITERAL_STR) {
+ PgfLiteralStr* lstr = inf.data;
+ if (strcmp(lstr->val, "off") == 0)
+ return false;
+ }
+ }
+ return true;
+}
+
+static int
+pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
+{
+ PgfSequenceOrder* self = gu_container(order, PgfSequenceOrder, order);
+
+ PgfCohortSpot spot = {0, (GuString) p1};
+
+ const PgfSequence* sp2 = p2;
+
+ size_t sym_idx = 0;
+ int res = pgf_symbols_cmp(&spot, sp2->syms, &sym_idx, self->case_sensitive);
+ if (res == 0 && (*spot.ptr != 0 || sym_idx != gu_seq_length(sp2->syms))) {
+ res = 1;
+ }
+
+ return res;
+}
+
+PGF_API void
+pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
+ PgfMorphoCallback* callback, GuExn* err)
+{
+ if (concr->sequences == NULL) {
+ GuExnData* err_data = gu_raise(err, PgfExn);
+ if (err_data) {
+ err_data->data = "The concrete syntax is not loaded";
+ return;
+ }
+ }
+
+ size_t index = 0;
+ PgfSequenceOrder order = { { pgf_sequence_cmp_fn },
+ pgf_is_case_sensitive(concr) };
+ if (gu_seq_binsearch_index(concr->sequences, &order.order,
+ PgfSequence, (void*) sentence,
+ &index)) {
+ PgfSequence* seq = NULL;
+
+ /* If the match is case-insensitive then there might be more
+ * matches around the current index. We must check the neighbour
+ * sequences for matching as well.
+ */
+
+ if (!order.case_sensitive) {
+ size_t i = index;
+ while (i > 0) {
+ seq = gu_seq_index(concr->sequences, PgfSequence, i-1);
+
+ size_t sym_idx = 0;
+ PgfCohortSpot spot = {0, sentence};
+ if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, order.case_sensitive) != 0) {
+ break;
+ }
+
+ if (seq->idx != NULL)
+ pgf_morpho_iter(seq->idx, callback, err);
+
+ i--;
+ }
+ }
+
+ seq = gu_seq_index(concr->sequences, PgfSequence, index);
+ if (seq->idx != NULL)
+ pgf_morpho_iter(seq->idx, callback, err);
+
+ if (!order.case_sensitive) {
+ size_t i = index+1;
+ while (i < gu_seq_length(concr->sequences)) {
+ seq = gu_seq_index(concr->sequences, PgfSequence, i);
+
+ size_t sym_idx = 0;
+ PgfCohortSpot spot = {0, sentence};
+ if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, order.case_sensitive) != 0) {
+ break;
+ }
+
+ if (seq->idx != NULL)
+ pgf_morpho_iter(seq->idx, callback, err);
+
+ i++;
+ }
+ }
+ }
+}
+
+typedef struct {
+ GuEnum en;
+ PgfConcr* concr;
+ GuString sentence;
+ GuString current;
+ size_t len;
+ PgfMorphoCallback* callback;
+ GuExn* err;
+ bool case_sensitive;
+ GuBuf* spots;
+ GuBuf* found;
+} PgfCohortsState;
+
+static int
+cmp_cohort_spot(GuOrder* self, const void* a, const void* b)
+{
+ PgfCohortSpot *s1 = (PgfCohortSpot *) a;
+ PgfCohortSpot *s2 = (PgfCohortSpot *) b;
+
+ return (s1->ptr-s2->ptr);
+}
+
+static GuOrder
+pgf_cohort_spot_order[1] = {{ cmp_cohort_spot }};
+
+static void
+pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
+ int i, int j, ptrdiff_t min, ptrdiff_t max)
+{
+ // This is a variation of a binary search algorithm which
+ // can retrieve all prefixes of a string with minimal
+ // comparisons, i.e. there is no need to lookup every
+ // prefix separately.
+
+ while (i <= j) {
+ int k = (i+j) / 2;
+ PgfSequence* seq = gu_seq_index(state->concr->sequences, PgfSequence, k);
+
+ PgfCohortSpot current = *spot;
+
+ size_t sym_idx = 0;
+ int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, state->case_sensitive);
+ if (cmp < 0) {
+ j = k-1;
+ } else if (cmp > 0) {
+ ptrdiff_t len = current.ptr - spot->ptr;
+
+ if (min <= len)
+ pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
+
+ if (len+1 <= max)
+ pgf_lookup_cohorts_helper(state, spot, k+1, j, len+1, max);
+
+ break;
+ } else {
+ ptrdiff_t len = current.ptr - spot->ptr;
+
+ if (min <= len)
+ pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
+
+ if (seq->idx != NULL && gu_buf_length(seq->idx) > 0) {
+ PgfCohortRange* range = gu_buf_insert(state->found, 0);
+ range->start = *spot;
+ range->end = current;
+ range->buf = seq->idx;
+ }
+
+ while (*current.ptr != 0) {
+ if (!skip_space(¤t.ptr, ¤t.pos))
+ break;
+ }
+
+ gu_buf_heap_push(state->spots, pgf_cohort_spot_order, ¤t);
+
+ if (len <= max)
+ pgf_lookup_cohorts_helper(state, spot, k+1, j, len, max);
+
+ break;
+ }
+ }
+}
+
+static void
+pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
+{
+ PgfCohortsState* state = gu_container(self, PgfCohortsState, en);
+
+ while (gu_buf_length(state->found) == 0 &&
+ gu_buf_length(state->spots) > 0) {
+ PgfCohortSpot spot;
+ gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
+
+ if (spot.ptr == state->current)
+ continue;
+
+ if (*spot.ptr == 0)
+ break;
+
+ pgf_lookup_cohorts_helper
+ (state, &spot,
+ 0, gu_seq_length(state->concr->sequences)-1,
+ 1, (state->sentence+state->len)-spot.ptr);
+
+ if (gu_buf_length(state->found) == 0) {
+ // skip one character and try again
+ gu_utf8_decode((const uint8_t**) &spot.ptr);
+ spot.pos++;
+ gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
+ }
+ }
+
+ PgfCohortRange* pRes = (PgfCohortRange*)to;
+
+ if (gu_buf_length(state->found) == 0) {
+ pRes->start.pos = 0;
+ pRes->start.ptr = NULL;
+ pRes->end.pos = 0;
+ pRes->end.ptr = NULL;
+ pRes->buf = NULL;
+ state->current = NULL;
+ return;
+ } else do {
+ *pRes = gu_buf_pop(state->found, PgfCohortRange);
+ state->current = pRes->start.ptr;
+ pgf_morpho_iter(pRes->buf, state->callback, state->err);
+ } while (gu_buf_length(state->found) > 0 &&
+ gu_buf_index_last(state->found, PgfCohortRange)->end.ptr == pRes->end.ptr);
+
+}
+
+PGF_API GuEnum*
+pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
+ PgfMorphoCallback* callback,
+ GuPool* pool, GuExn* err)
+{
+ if (concr->sequences == NULL) {
+ GuExnData* err_data = gu_raise(err, PgfExn);
+ if (err_data) {
+ err_data->data = "The concrete syntax is not loaded";
+ return NULL;
+ }
+ }
+
+ PgfCohortsState* state = gu_new(PgfCohortsState, pool);
+ state->en.next = pgf_lookup_cohorts_enum_next;
+ state->concr = concr;
+ state->sentence= sentence;
+ state->len = strlen(sentence);
+ state->callback= callback;
+ state->err = err;
+ state->case_sensitive = pgf_is_case_sensitive(concr);
+ state->spots = gu_new_buf(PgfCohortSpot, pool);
+ state->found = gu_new_buf(PgfCohortRange, pool);
+
+ PgfCohortSpot spot = {0,sentence};
+ while (*spot.ptr != 0) {
+ if (!skip_space(&spot.ptr, &spot.pos))
+ break;
+ }
+
+ gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
+
+ return &state->en;
+}
+
+typedef struct {
+ GuEnum en;
+ PgfSequences* sequences;
+ GuString prefix;
+ size_t seq_idx;
+ bool case_sensitive;
+} PgfFullFormState;
+
+struct PgfFullFormEntry {
+ GuString tokens;
+ PgfProductionIdx* idx;
+};
+
+static void
+gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
+{
+ PgfFullFormState* st = gu_container(self, PgfFullFormState, en);
+ PgfFullFormEntry* entry = NULL;
+
+ if (st->sequences != NULL) {
+ size_t n_seqs = gu_seq_length(st->sequences);
+ while (st->seq_idx < n_seqs) {
+ PgfSequence* seq = gu_seq_index(st->sequences, PgfSequence, st->seq_idx);
+ GuString tokens = pgf_get_tokens(seq->syms, 0, pool);
+
+ PgfCohortSpot spot = {0, st->prefix};
+ if (cmp_string(&spot, tokens, st->case_sensitive) > 0 || *spot.ptr != 0) {
+ st->seq_idx = n_seqs;
+ break;
+ }
+
+ if (*tokens != 0 && seq->idx != NULL) {
+ entry = gu_new(PgfFullFormEntry, pool);
+ entry->tokens = tokens;
+ entry->idx = seq->idx;
+
+ st->seq_idx++;
+ break;
+ }
+
+ st->seq_idx++;
+ }
+ }
+
+ *((PgfFullFormEntry**) to) = entry;
+}
+
+PGF_API GuEnum*
+pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
+{
+ PgfFullFormState* st = gu_new(PgfFullFormState, pool);
+ st->en.next = gu_fullform_enum_next;
+ st->sequences = concr->sequences;
+ st->prefix = "";
+ st->seq_idx = 0;
+ st->case_sensitive = true;
+ return &st->en;
+}
+
+PGF_API GuString
+pgf_fullform_get_string(PgfFullFormEntry* entry)
+{
+ return entry->tokens;
+}
+
+PGF_API void
+pgf_fullform_get_analyses(PgfFullFormEntry* entry,
+ PgfMorphoCallback* callback, GuExn* err)
+{
+ pgf_morpho_iter(entry->idx, callback, err);
+}
+
+PGF_API GuEnum*
+pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
+ GuPool* pool, GuExn* err)
+{
+ if (concr->sequences == NULL) {
+ GuExnData* err_data = gu_raise(err, PgfExn);
+ if (err_data) {
+ err_data->data = "The concrete syntax is not loaded";
+ return NULL;
+ }
+ }
+
+ PgfFullFormState* state = gu_new(PgfFullFormState, pool);
+ state->en.next = gu_fullform_enum_next;
+ state->sequences = concr->sequences;
+ state->prefix = prefix;
+ state->seq_idx = 0;
+ state->case_sensitive = pgf_is_case_sensitive(concr);
+
+ PgfSequenceOrder order = { { pgf_sequence_cmp_fn },
+ state->case_sensitive };
+ if (!gu_seq_binsearch_index(concr->sequences, &order.order,
+ PgfSequence, (void*) prefix,
+ &state->seq_idx)) {
+ state->seq_idx++;
+ } else if (!state->case_sensitive) {
+ /* If the match is case-insensitive then there might be more
+ * matches around the current index. Since we scroll down
+ * anyway, it is enough to search upwards now.
+ */
+
+ while (state->seq_idx > 0) {
+ PgfSequence* seq =
+ gu_seq_index(concr->sequences, PgfSequence, state->seq_idx-1);
+
+ size_t sym_idx = 0;
+ PgfCohortSpot spot = {0, state->prefix};
+ if (pgf_symbols_cmp(&spot, seq->syms, &sym_idx, state->case_sensitive) > 0 || *spot.ptr != 0) {
+ break;
+ }
+
+ state->seq_idx--;
+ }
+ }
+
+ return &state->en;
+}
diff --git a/src/runtime/c/sg/sg.c b/src/runtime/c/sg/sg.c
index bcb97f55e..b5a473b99 100644
--- a/src/runtime/c/sg/sg.c
+++ b/src/runtime/c/sg/sg.c
@@ -499,14 +499,17 @@ store_expr(SgSG* sg,
PgfExprLit* elit = ei.data;
Mem mem[2];
+ size_t len = 0;
GuVariantInfo li = gu_variant_open(elit->lit);
switch (li.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr* lstr = li.data;
+ len = strlen(lstr->val);
+
mem[0].flags = MEM_Str;
- mem[0].n = strlen(lstr->val);
+ mem[0].n = len;
mem[0].z = lstr->val;
break;
}
@@ -515,6 +518,7 @@ store_expr(SgSG* sg,
mem[0].flags = MEM_Int;
mem[0].u.i = lint->val;
+ len = sizeof(mem[0].u.i);
break;
}
case PGF_LITERAL_FLT: {
@@ -522,6 +526,7 @@ store_expr(SgSG* sg,
mem[0].flags = MEM_Real;
mem[0].u.r = lflt->val;
+ len = sizeof(mem[0].u.r);
break;
}
default:
@@ -556,7 +561,7 @@ store_expr(SgSG* sg,
int serial_type_arg = sqlite3BtreeSerialType(&mem[1], file_format);
int serial_type_arg_hdr_len = sqlite3BtreeVarintLen(serial_type_arg);
- unsigned char* buf = malloc(1+serial_type_lit_hdr_len+(serial_type_arg_hdr_len > 1 ? serial_type_arg_hdr_len : 1)+mem[0].n+8);
+ unsigned char* buf = malloc(1+serial_type_lit_hdr_len+(serial_type_arg_hdr_len > 1 ? serial_type_arg_hdr_len : 1)+len+8);
unsigned char* p = buf;
*p++ = 1+serial_type_lit_hdr_len+serial_type_arg_hdr_len;
p += putVarint32(p, serial_type_lit);
diff --git a/src/runtime/c/sg/sqlite3Btree.c b/src/runtime/c/sg/sqlite3Btree.c
index 999606791..7a0683009 100644
--- a/src/runtime/c/sg/sqlite3Btree.c
+++ b/src/runtime/c/sg/sqlite3Btree.c
@@ -4835,7 +4835,6 @@ SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void);
SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*);
SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **);
-typedef int (*RecordCompare)(int,const void*,UnpackedRecord*);
SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*);
/************** End of btreeInt.h ********************************************/
diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc
index dd03c3f3b..38ef3a07d 100644
--- a/src/runtime/haskell-bind/PGF2.hsc
+++ b/src/runtime/haskell-bind/PGF2.hsc
@@ -66,7 +66,7 @@ module PGF2 (-- * PGF
-- ** Generation
generateAll,
-- ** Morphological Analysis
- MorphoAnalysis, lookupMorpho, fullFormLexicon,
+ MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
-- ** Visualizations
GraphvizOptions(..), graphvizDefaults,
graphvizAbstractTree, graphvizParseTree,
@@ -168,8 +168,6 @@ showPGF p =
languages :: PGF -> Map.Map ConcName Concr
languages p = langs p
--- | The abstract language name is the name of the top-level
--- abstract module
concreteName :: Concr -> ConcName
concreteName c = unsafePerformIO (peekUtf8CString =<< pgf_concrete_name (concr c))
@@ -893,8 +891,23 @@ newGraphvizOptions pool opts = do
-- Functions using Concr
-- Morpho analyses, parsing & linearization
-type MorphoAnalysis = (Fun,Cat,Float)
+-- | This triple is returned by all functions that deal with
+-- the grammar's lexicon. Its first element is the name of an abstract
+-- lexical function which can produce a given word or
+-- a multiword expression (i.e. this is the lemma).
+-- After that follows a string which describes
+-- the particular inflection form.
+--
+-- The last element is a logarithm from the
+-- the probability of the function. The probability is not
+-- conditionalized on the category of the function. This makes it
+-- possible to compare the likelihood of two functions even if they
+-- have different types.
+type MorphoAnalysis = (Fun,String,Float)
+-- | 'lookupMorpho' takes a string which must be a single word or
+-- a multiword expression. It then computes the list of all possible
+-- morphological analyses.
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
lookupMorpho (Concr concr master) sent =
unsafePerformIO $
@@ -908,6 +921,45 @@ lookupMorpho (Concr concr master) sent =
freeHaskellFunPtr fptr
readIORef ref
+-- | 'lookupCohorts' takes an arbitrary string an produces
+-- a list of all places where lexical items from the grammar have been
+-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@,
+-- where @start-end@ identifies the span in the text and @ans@ is
+-- the list of possible morphological analyses similar to 'lookupMorpho'.
+--
+-- The list is sorted first by the @start@ position and after than
+-- by the @end@ position. This can be used for instance if you want to
+-- filter only the longest matches.
+lookupCohorts :: Concr -> String -> [(Int,[MorphoAnalysis],Int)]
+lookupCohorts lang@(Concr concr master) sent =
+ unsafePerformIO $
+ do pl <- gu_new_pool
+ ref <- newIORef []
+ cback <- gu_malloc pl (#size PgfMorphoCallback)
+ fptr <- wrapLookupMorphoCallback (getAnalysis ref)
+ (#poke PgfMorphoCallback, callback) cback fptr
+ c_sent <- newUtf8CString sent pl
+ enum <- pgf_lookup_cohorts concr c_sent cback pl nullPtr
+ fpl <- newForeignPtr gu_pool_finalizer pl
+ fromCohortRange enum fpl fptr ref
+ where
+ fromCohortRange enum fpl fptr ref =
+ allocaBytes (#size PgfCohortRange) $ \ptr ->
+ withForeignPtr fpl $ \pl ->
+ do gu_enum_next enum ptr pl
+ buf <- (#peek PgfCohortRange, buf) ptr
+ if buf == nullPtr
+ then do finalizeForeignPtr fpl
+ freeHaskellFunPtr fptr
+ touchConcr lang
+ return []
+ else do start <- (#peek PgfCohortRange, start.pos) ptr
+ end <- (#peek PgfCohortRange, end.pos) ptr
+ ans <- readIORef ref
+ writeIORef ref []
+ cohs <- unsafeInterleaveIO (fromCohortRange enum fpl fptr ref)
+ return ((start,ans,end):cohs)
+
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon lang =
unsafePerformIO $
@@ -1393,11 +1445,13 @@ bracketedLinearize lang e = unsafePerformIO $
end_phrase ref _ c_cat c_fid c_lindex c_fun = do
(bs':stack,bs) <- readIORef ref
- cat <- peekUtf8CString c_cat
- let fid = fromIntegral c_fid
- let lindex = fromIntegral c_lindex
- fun <- peekUtf8CString c_fun
- writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
+ if null bs
+ then writeIORef ref (stack, bs')
+ else do cat <- peekUtf8CString c_cat
+ let fid = fromIntegral c_fid
+ let lindex = fromIntegral c_lindex
+ fun <- peekUtf8CString c_fun
+ writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
symbol_ne exn _ = do
gu_exn_raise exn gu_exn_type_PgfLinNonExist
diff --git a/src/runtime/haskell-bind/PGF2/Expr.hsc b/src/runtime/haskell-bind/PGF2/Expr.hsc
index 10db1291a..203f3ab94 100644
--- a/src/runtime/haskell-bind/PGF2/Expr.hsc
+++ b/src/runtime/haskell-bind/PGF2/Expr.hsc
@@ -6,7 +6,9 @@ import System.IO.Unsafe(unsafePerformIO)
import Foreign hiding (unsafePerformIO)
import Foreign.C
import Data.IORef
+import Data.Data
import PGF2.FFI
+import Data.Maybe(fromJust)
type Cat = String -- ^ Name of syntactic category
type Fun = String -- ^ Name of function
@@ -36,6 +38,20 @@ instance Eq Expr where
e1_touch >> e2_touch
return (res /= 0)
+instance Data Expr where
+ gfoldl f z e = z (fromJust . readExpr) `f` (showExpr [] e)
+ toConstr _ = readExprConstr
+ gunfold k z c = case constrIndex c of
+ 1 -> k (z (fromJust . readExpr))
+ _ -> error "gunfold"
+ dataTypeOf _ = exprDataType
+
+readExprConstr :: Constr
+readExprConstr = mkConstr exprDataType "(fromJust . readExpr)" [] Prefix
+
+exprDataType :: DataType
+exprDataType = mkDataType "PGF2.Expr" [readExprConstr]
+
-- | Constructs an expression by lambda abstraction
mkAbs :: BindType -> String -> Expr -> Expr
mkAbs bind_type var (Expr body bodyTouch) =
diff --git a/src/runtime/haskell-bind/PGF2/FFI.hsc b/src/runtime/haskell-bind/PGF2/FFI.hsc
index f5a30b006..e99d200db 100644
--- a/src/runtime/haskell-bind/PGF2/FFI.hsc
+++ b/src/runtime/haskell-bind/PGF2/FFI.hsc
@@ -100,7 +100,7 @@ foreign import ccall unsafe "gu/string.h gu_string_buf_out"
foreign import ccall unsafe "gu/file.h gu_file_in"
gu_file_in :: Ptr () -> Ptr GuPool -> IO (Ptr GuIn)
-foreign import ccall unsafe "gu/enum.h gu_enum_next"
+foreign import ccall safe "gu/enum.h gu_enum_next"
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
@@ -409,6 +409,9 @@ foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()
+foreign import ccall "pgf/pgf.h pgf_lookup_cohorts"
+ pgf_lookup_cohorts :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuEnum)
+
type LookupMorphoCallback = Ptr PgfMorphoCallback -> CString -> CString -> Float -> Ptr GuExn -> IO ()
foreign import ccall "wrapper"
diff --git a/src/runtime/haskell-bind/PGF2/Internal.hsc b/src/runtime/haskell-bind/PGF2/Internal.hsc
index d22df08b3..e8f0b5581 100644
--- a/src/runtime/haskell-bind/PGF2/Internal.hsc
+++ b/src/runtime/haskell-bind/PGF2/Internal.hsc
@@ -16,6 +16,9 @@ module PGF2.Internal(-- * Access the internal structures
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF,
+ -- * Expose PGF and Concr for FFI with C
+ PGF(..), Concr(..),
+
-- * Write an in-memory PGF to a file
unionPGF, writePGF, writeConcr,
@@ -592,17 +595,17 @@ newAbstr aflags cats funs = unsafePerformIO $ do
data ConcrInfo = ConcrInfo (Ptr GuSeq) (Ptr GuMap) (Ptr GuMap) (Ptr GuSeq) (Ptr GuSeq) (Ptr GuMap) (Ptr PgfConcr -> Ptr GuPool -> IO ()) CInt
-newConcr :: (?builder :: Builder s) => B s AbstrInfo ->
- [(String,Literal)] -> -- ^ Concrete syntax flags
- [(String,String)] -> -- ^ Printnames
- [(FId,[FunId])] -> -- ^ Lindefs
- [(FId,[FunId])] -> -- ^ Linrefs
- [(FId,[Production])] -> -- ^ Productions
- [(Fun,[SeqId])] -> -- ^ Concrete functions (must be sorted by Fun)
- [[Symbol]] -> -- ^ Sequences (must be sorted)
- [(Cat,FId,FId,[String])] -> -- ^ Concrete categories
- FId -> -- ^ The total count of the categories
- B s ConcrInfo
+newConcr :: (?builder :: Builder s) => B s AbstrInfo
+ -> [(String,Literal)] -- ^ Concrete syntax flags
+ -> [(String,String)] -- ^ Printnames
+ -> [(FId,[FunId])] -- ^ Lindefs
+ -> [(FId,[FunId])] -- ^ Linrefs
+ -> [(FId,[Production])] -- ^ Productions
+ -> [(Fun,[SeqId])] -- ^ Concrete functions (must be sorted by Fun)
+ -> [[Symbol]] -- ^ Sequences (must be sorted)
+ -> [(Cat,FId,FId,[String])] -- ^ Concrete categories
+ -> FId -- ^ The total count of the categories
+ -> B s ConcrInfo
newConcr (B (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _)) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do
c_cflags <- newFlags cflags pool
c_printname <- newMap (#size GuString) gu_string_hasher newUtf8CString
diff --git a/src/runtime/haskell-bind/utils.c b/src/runtime/haskell-bind/utils.c
index 5afb33b5c..91d62ea56 100644
--- a/src/runtime/haskell-bind/utils.c
+++ b/src/runtime/haskell-bind/utils.c
@@ -100,7 +100,7 @@ hspgf_predict_callback(PgfOracleCallback* self,
size_t offset)
{
HSPgfOracleCallback* oracle = gu_container(self, HSPgfOracleCallback, oracle);
- oracle->predict(cat,label,hspgf_offset2hs(oracle->sentence, offset));
+ return oracle->predict(cat,label,hspgf_offset2hs(oracle->sentence, offset));
}
static bool
@@ -110,7 +110,7 @@ hspgf_complete_callback(PgfOracleCallback* self,
size_t offset)
{
HSPgfOracleCallback* oracle = gu_container(self, HSPgfOracleCallback, oracle);
- oracle->complete(cat,label,hspgf_offset2hs(oracle->sentence, offset));
+ return oracle->complete(cat,label,hspgf_offset2hs(oracle->sentence, offset));
}
static PgfExprProb*
diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs
index 235d662d5..c47212b11 100644
--- a/src/runtime/haskell/PGF.hs
+++ b/src/runtime/haskell/PGF.hs
@@ -371,7 +371,7 @@ browse pgf id = fmap (\def -> (def,producers,consumers)) definition
Just (hyps,_,_) -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)))
Nothing -> Nothing
- (producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
+ (producers,consumers) = Map.foldrWithKey accum ([],[]) (funs (abstract pgf))
where
accum f (ty,_,_,_) (plist,clist) =
let !plist' = if id `elem` ps then f : plist else plist
diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs
index 2a680b7c9..ee15e2cf9 100644
--- a/src/runtime/haskell/PGF/Forest.hs
+++ b/src/runtime/haskell/PGF/Forest.hs
@@ -58,8 +58,8 @@ bracketedTokn :: Maybe Int -> Forest -> BracketedTokn
bracketedTokn dp f@(Forest abs cnc forest root) =
case [computeSeq isTrusted seq (map (render forest) args) | (seq,args) <- root] of
([bs@(Bracket_{})]:_) -> bs
- (bss:_) -> Bracket_ wildCId 0 0 wildCId [] bss
- [] -> Bracket_ wildCId 0 0 wildCId [] []
+ (bss:_) -> Bracket_ wildCId 0 0 0 wildCId [] bss
+ [] -> Bracket_ wildCId 0 0 0 wildCId [] []
where
isTrusted (_,fid) = IntSet.member fid trusted
@@ -190,7 +190,7 @@ foldForest :: (FunId -> [PArg] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
- Just set -> Set.fold foldProd b set
+ Just set -> Set.foldr foldProd b set
where
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
foldProd (PApply funid args) b = f funid args b
diff --git a/src/runtime/haskell/PGF/Haskell.hs b/src/runtime/haskell/PGF/Haskell.hs
index 87cecc5d4..8b99a61e1 100644
--- a/src/runtime/haskell/PGF/Haskell.hs
+++ b/src/runtime/haskell/PGF/Haskell.hs
@@ -33,6 +33,7 @@ fromStr = from False id
from space cap ts =
case ts of
[] -> []
+ TK "":ts -> from space cap ts
TK s:ts -> put s++from True cap ts
BIND:ts -> from False cap ts
SOFT_BIND:ts -> from False cap ts
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs
index de175616c..96f9f3535 100644
--- a/src/runtime/haskell/PGF/Macros.hs
+++ b/src/runtime/haskell/PGF/Macros.hs
@@ -137,7 +137,7 @@ cidVar = mkCId "__gfVar"
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf Token -- ^ this is the leaf i.e. a single token
- | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
+ | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
-- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
@@ -151,7 +151,7 @@ data BracketedString
-- that represents the same constituent.
data BracketedTokn
- = Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
+ = Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
| LeafKS Token
| LeafNE
| LeafBIND
@@ -169,12 +169,12 @@ showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
-ppBracketedString (Bracket cat fid index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
+ppBracketedString (Bracket cat fid fid' index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | The length of the bracketed string in number of tokens.
lengthBracketedString :: BracketedString -> Int
-lengthBracketedString (Leaf _) = 1
-lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
+lengthBracketedString (Leaf _) = 1
+lengthBracketedString (Bracket _ _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
untokn :: Maybe String -> [BracketedTokn] -> (Maybe String,[BracketedString])
untokn nw bss =
@@ -183,10 +183,10 @@ untokn nw bss =
Just bss -> (nw,concat bss)
Nothing -> (nw,[])
where
- untokn nw (Bracket_ cat fid index fun es bss) =
+ untokn nw (Bracket_ cat fid fid' index fun es bss) =
let (nw',bss') = mapAccumR untokn nw bss
in case sequence bss' of
- Just bss -> (nw',Just [Bracket cat fid index fun es (concat bss)])
+ Just bss -> (nw',Just [Bracket cat fid fid' index fun es (concat bss)])
Nothing -> (Nothing, Nothing)
untokn nw (LeafKS t)
| null t = (nw,Just [])
@@ -227,16 +227,16 @@ computeSeq filter seq args = concatMap compute seq
getArg d r
| not (null arg_lin) &&
- filter ct = [Bracket_ cat fid r fun es arg_lin]
+ filter ct = [Bracket_ cat fid fid' r fun es arg_lin]
| otherwise = arg_lin
where
- arg_lin = lin ! r
- (ct@(cat,fid),_,fun,es,(_xs,lin)) = args !! d
+ arg_lin = lin ! r
+ (ct@(cat,fid),fid',fun,es,(_xs,lin)) = args !! d
getVar d r = [LeafKS (showCId (xs !! r))]
where
(_ct,_,_fun,_es,(xs,_lin)) = args !! d
flattenBracketedString :: BracketedString -> [String]
-flattenBracketedString (Leaf w) = [w]
-flattenBracketedString (Bracket _ _ _ _ _ bss) = concatMap flattenBracketedString bss
+flattenBracketedString (Leaf w) = [w]
+flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss
diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs
index 51b1d3273..d4df937db 100644
--- a/src/runtime/haskell/PGF/Parse.hs
+++ b/src/runtime/haskell/PGF/Parse.hs
@@ -198,7 +198,7 @@ recoveryStates open_types (EState abs cnc chart) =
Nothing -> []
complete open_fcats items ac =
- foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ foldl (Set.foldr (\(Active j' ppos funid seqid args keyc) ->
(:) (Active j' (ppos+1) funid seqid args keyc)))
items
[set | fcat <- open_fcats, (set,_) <- lookupACByFCat fcat ac]
@@ -363,7 +363,7 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
Nothing -> items
- Just (set,sc) -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ Just (set,sc) -> Set.foldr (\(Active j' ppos funid seqid args keyc) ->
let SymCat d _ = unsafeAt (unsafeAt (sequences cnc) seqid) ppos
PArg hypos _ = args !! d
in (:) (Active j' (ppos+1) funid seqid (updateAt d (PArg hypos fid) args) keyc)) items set
@@ -395,7 +395,7 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
predict flit ftok cnc forest key0 key@(AK fid lbl) k acc items =
let (acc1,items1) = case IntMap.lookup fid forest of
Nothing -> (acc,items)
- Just set -> Set.fold foldProd (acc,items) set
+ Just set -> Set.foldr foldProd (acc,items) set
(acc2,items2) = case IntMap.lookup fid (lexicon cnc) >>= IntMap.lookup lbl of
Just tmap -> let (mb_v,toks) = TrieMap.decompose (TrieMap.map (toItems key0 k) tmap)
diff --git a/src/runtime/haskell/PGF/TrieMap.hs b/src/runtime/haskell/PGF/TrieMap.hs
index f0383941a..fbf6ea26e 100644
--- a/src/runtime/haskell/PGF/TrieMap.hs
+++ b/src/runtime/haskell/PGF/TrieMap.hs
@@ -79,12 +79,12 @@ unionsWith f = foldl (unionWith f) empty
elems :: TrieMap k v -> [v]
elems tr = collect tr []
where
- collect (Tr mb_v m) xs = maybe id (:) mb_v (Map.fold collect xs m)
+ collect (Tr mb_v m) xs = maybe id (:) mb_v (Map.foldr collect xs m)
toList :: TrieMap k v -> [([k],v)]
toList tr = collect [] tr []
where
- collect ks (Tr mb_v m) xs = maybe id (\v -> (:) (ks,v)) mb_v (Map.foldWithKey (\k -> collect (k:ks)) xs m)
+ collect ks (Tr mb_v m) xs = maybe id (\v -> (:) (ks,v)) mb_v (Map.foldrWithKey (\k -> collect (k:ks)) xs m)
fromListWith :: Ord k => (v -> v -> v) -> [([k],v)] -> TrieMap k v
fromListWith f xs = foldl' (\trie (ks,v) -> insertWith f ks v trie) empty xs
diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs
index 5d884fafe..e27ad080b 100644
--- a/src/runtime/haskell/PGF/VisualizeTree.hs
+++ b/src/runtime/haskell/PGF/VisualizeTree.hs
@@ -34,8 +34,9 @@ import PGF.Macros (lookValCat, BracketedString(..))
import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap
-import Data.List (intersperse,nub,mapAccumL,find,groupBy)
---import Data.Char (isDigit)
+import Data.List (intersperse,nub,mapAccumL,find,groupBy,sortBy,partition)
+import Data.Ord (comparing)
+import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Text.PrettyPrint
@@ -131,6 +132,7 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
"latex" -> render . ppLaTeX $ conll2latex' conll
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
"conll" -> printCoNLL conll
+ "conllu" -> printCoNLL ([["# text = " ++ linearize pgf lang t], ["# tree = " ++ showExpr [] t]] ++ conll)
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
_ -> render $ text "digraph {" $$
@@ -144,16 +146,16 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
conll0 = (map.map) render wnodes
nodes = map mkNode leaves
- links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
+ links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun,_),_,w) <- tail leaves]
-- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL
-- P variants are automatically predicted rather than gold standard
- wnodes = [[int i, maltws ws, ppCId fun, ppCId (posCat cat), ppCId cat, unspec, int parent, text lab, unspec, unspec] |
- ((cat,fid,fun),i,ws) <- tail leaves,
+ wnodes = [[int i, maltws ws, ppCId fun, ppCId (posCat cat), ppCId cat, int lind, int parent, text lab, unspec, unspec] |
+ ((cat,fid,fun,lind),i,ws) <- tail leaves,
let (lab,parent) = fromMaybe (dep_lbl,0)
(do (lbl,fid) <- lookup fid deps
- (_,i,_) <- find (\((_,fid1,_),i,_) -> fid == fid1) leaves
+ (_,i,_) <- find (\((_,fid1,_,_),i,_) -> fid == fid1) leaves
return (lbl,i))
]
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
@@ -162,7 +164,7 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
bss = bracketedLinearize pgf lang t
- root = (wildCId,nil,wildCId)
+ root = (wildCId,nil,wildCId,0)
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss
deps = let (_,(h,deps)) = getDeps 0 [] t []
@@ -180,10 +182,10 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
getLeaves parent bs =
case bs of
- Leaf w -> [(parent,w)]
- Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss
+ Leaf w -> [(parent,w)]
+ Bracket cat fid _ lind fun _ bss -> concatMap (getLeaves (cat,fid,fun,lind)) bss
- mkNode ((_,p,_),i,w) =
+ mkNode ((_,p,_,_),i,w) =
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
@@ -234,10 +236,18 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
root_lbl = "ROOT"
unspec = text "_"
+-- auxiliaries for UD conversion PK 15/12/2018
+rmcomments :: String -> String
+rmcomments [] = []
+rmcomments ('-':'-':xs) = []
+rmcomments ('-':x :xs) = '-':rmcomments (x:xs)
+rmcomments (x:xs) = x:rmcomments xs
+
-- | Prepare lines obtained from a configuration file for labels for
-- use with 'graphvizDependencyTree'. Format per line /fun/ /label/@*@.
getDepLabels :: String -> Labels
-getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
+-- getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
+getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map (words . rmcomments) (lines s)]
-- the old function, without dependencies
graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String
@@ -291,13 +301,13 @@ graphvizBracketedString opts mbl tree bss = render graphviz_code
getInternals [] = []
getInternals nodes
= nub [(parent, fid, mkNode fun cat) |
- (parent, Bracket cat fid _ fun _ _) <- nodes]
+ (parent, Bracket cat fid _ _ fun _ _) <- nodes]
: getInternals [(fid, child) |
- (_, Bracket _ fid _ _ _ children) <- nodes,
+ (_, Bracket _ fid _ _ _ _ children) <- nodes,
child <- children]
getLeaves cat parent (Leaf word) = [(parent, (cat, word))] -- the lowest cat before the word
- getLeaves _ parent (Bracket cat fid i _ _ children)
+ getLeaves _ parent (Bracket cat fid _ i _ _ children)
= concatMap (getLeaves cat fid) children
mkLevel nodes
@@ -401,8 +411,8 @@ genPreAlignment pgf langs = lin2align . linsBracketed
getLeaves parent bs =
case bs of
- Leaf w -> [(parent,w)]
- Bracket _ fid _ _ _ bss -> concatMap (getLeaves fid) bss
+ Leaf w -> [(parent,w)]
+ Bracket _ fid _ _ _ _ bss -> concatMap (getLeaves fid) bss
mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest)
in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)
@@ -512,7 +522,7 @@ conll2latex' = dep2latex . conll2dep'
data Dep = Dep {
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
- , tokens :: [(String,String)] -- word, pos (0..)
+ , tokens :: [(String,(String,String))] -- word, (pos,features) (0..)
, deps :: [((Int,Int),String)] -- from, to, label
, root :: Int -- root word position
}
@@ -552,7 +562,8 @@ dep2latex d =
[Comment (unwords (map fst (tokens d))),
Picture defaultUnit (width,height) (
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
- ++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
+ ++ [Put (wpos rwld i,15) (TinyText w) | (i,(w,_)) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
+--- ++ [Put (wpos rwld i,-15) (TinyText w) | (i,(_,w)) <- zip [0..] (map snd (tokens d))] -- features 15u below bottom -> DON'T SHOW
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
@@ -583,8 +594,8 @@ conll2dep' ls = Dep {
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
}
where
- wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
- toks = [(w,c) | _:w:_:c:_ <- ls]
+ wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,(pos,feat)) = toks !! i in [tok,pos {-,feat-}]]) --- feat not shown
+ toks = [(w,(c,m)) | _:w:_:c:_:m:_ <- ls]
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
@@ -749,18 +760,26 @@ ppSVG svg =
-- UseComp {"not"} PART neg head
-- UseComp {*} AUX cop head
-type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
--- (fun, word -> (pos,label,target))
--- the pos can remain unchanged, as in the current notation in the article
+type CncLabels = [
+ Either
+ (String, String -> Maybe (String -> String,String,String))
+ -- (fun, word -> (pos,label,target))
+ -- the pos can remain unchanged, as in the current notation in the article
+ (String,[String])
+ -- (category, morphological forms)
+ ]
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
-fixCoNLL labels conll = map fixc conll where
+fixCoNLL cncLabels conll = map fixc conll where
+ labels = [l | Left l <- cncLabels]
+ flabels = [r | Right r <- cncLabels]
+
fixc row = case row of
- (i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
+ (i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:(feat cat word x_):"0":"root":xs) --- change the root label from dep to root
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
- Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
- Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
- _ -> row
+ Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:(feat cat word x_):j :label':xs)
+ Just (pos',label',target) -> (i:word:fun:pos' pos:cat:(feat cat word x_): getDep j target:label':xs)
+ _ -> (i:word:fun:pos:cat:(feat cat word x_):j:label:xs)
_ -> row
look (fun,word) = case lookup fun labels of
@@ -775,16 +794,48 @@ fixCoNLL labels conll = map fixc conll where
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
+ feat cat word x = case lookup cat flabels of
+ Just tags | all isDigit x && length tags > read x -> tags !! read x
+ _ -> case lookup (show word) flabels of
+ Just (t:_) -> t
+ _ -> cat ++ "-" ++ x
+
getCncDepLabels :: String -> CncLabels
-getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
+getCncDepLabels s = wlabels ws ++ flabels fs
+ where
+ wlabels =
+ map Left .
+ map merge .
+ groupBy (\ (x,_) (a,_) -> x == a) .
+ sortBy (comparing fst) .
+ concatMap analyse .
+ filter chooseW
+
+ flabels =
+ map Right .
+ map collectTags .
+ map words
+
+ (fs,ws) = partition chooseF $ map uncomment $ lines s
+
--- choose is for compatibility with the general notation
- choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
-
+ chooseW line = notElem '(' line &&
+ elem '{' line
+ --- ignoring non-local (with "(") and abstract (without "{") rules
+ ---- TODO: this means that "(" cannot be a token
+
+ chooseF line = take 1 line == "@" --- feature assignments have the form e.g. @N SgNom SgGen ; no spaces inside tags
+
+ uncomment line = case line of
+ '-':'-':_ -> ""
+ c:cs -> c : uncomment cs
+ _ -> line
+
analyse line = case break (=='{') line of
(beg,_:ws) -> case break (=='}') ws of
- (toks,_:target) -> case (words beg, words target) of
- (fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
- (fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
+ (toks,_:target) -> case (getToks beg, words target) of
+ (funs,[ label,j]) -> [(fun, (tok, (id, label,j))) | fun <- funs, tok <- getToks toks]
+ (funs,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | fun <- funs, tok <- getToks toks]
_ -> []
_ -> []
_ -> []
@@ -793,8 +844,13 @@ getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap ana
Just new -> return new
_ -> lookup "*" (map snd rules)
)
- getToks = words . map (\c -> if elem c "\"," then ' ' else c)
+ getToks = map unquote . filter (/=",") . toks
+ toks s = case lex s of [(t,"")] -> [t] ; [(t,cc)] -> t:toks cc ; _ -> []
+ unquote s = case s of '"':cc@(_:_) | last cc == '"' -> init cc ; _ -> s
+ collectTags (w:ws) = (tail w,ws)
+
+-- added init to remove the last \n. otherwise, two empty lines are in between each sentence PK 17/12/2018
printCoNLL :: CoNLL -> String
-printCoNLL = unlines . map (concat . intersperse "\t")
+printCoNLL = init . unlines . map (concat . intersperse "\t")
diff --git a/src/runtime/haskell/pgf.cabal b/src/runtime/haskell/pgf.cabal
index d3146e9d4..1d11b007f 100644
--- a/src/runtime/haskell/pgf.cabal
+++ b/src/runtime/haskell/pgf.cabal
@@ -1,5 +1,5 @@
name: pgf
-version: 3.9-git
+version: 3.10
cabal-version: >= 1.20
build-type: Simple
@@ -12,11 +12,6 @@ bug-reports: https://github.com/GrammaticalFramework/GF/issues
maintainer: Thomas Hallgren
tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2
-flag custom-binary
- Description: Use a customised version of the binary package
- Default: True
- Manual: True
-
Library
default-language: Haskell2010
build-depends: base >= 4.6 && <5,
@@ -29,18 +24,14 @@ Library
mtl,
exceptions
- if flag(custom-binary)
- hs-source-dirs: ., binary
- other-modules:
- -- not really part of GF but I have changed the original binary library
- -- and we have to keep the copy for now.
- Data.Binary
- Data.Binary.Put
- Data.Binary.Get
- Data.Binary.Builder
- Data.Binary.IEEE754
- else
- build-depends: binary, data-binary-ieee754
+ other-modules:
+ -- not really part of GF but I have changed the original binary library
+ -- and we have to keep the copy for now.
+ Data.Binary
+ Data.Binary.Put
+ Data.Binary.Get
+ Data.Binary.Builder
+ Data.Binary.IEEE754
--ghc-options: -fwarn-unused-imports
--if impl(ghc>=7.8)
diff --git a/src/runtime/java/Makefile b/src/runtime/java/Makefile
index d5a25a2f6..4af682699 100644
--- a/src/runtime/java/Makefile
+++ b/src/runtime/java/Makefile
@@ -1,29 +1,37 @@
+INSTALL_PATH = /usr/local
+
C_SOURCES = jpgf.c jsg.c jni_utils.c
JAVA_SOURCES = $(wildcard org/grammaticalframework/pgf/*.java) \
$(wildcard org/grammaticalframework/sg/*.java)
JNI_INCLUDES = $(if $(wildcard /usr/lib/jvm/default-java/include/.*), -I/usr/lib/jvm/default-java/include -I/usr/lib/jvm/default-java/include/linux, \
- $(if $(wildcard /System/Library/Frameworks/JavaVM.framework/Versions/A/Headers/.*), -I/System/Library/Frameworks/JavaVM.framework/Versions/A/Headers, \
- $(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
- $(error No JNI headers found))))
+ $(if $(wildcard /usr/lib/jvm/java-1.11.0-openjdk-amd64/include/.*), -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/ -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/linux, \
+ $(if $(wildcard /System/Library/Frameworks/JavaVM.framework/Versions/A/Headers/.*), -I/System/Library/Frameworks/JavaVM.framework/Versions/A/Headers, \
+ $(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
+ $(error No JNI headers found)))))
-# For Windows replace the previous line with something like this:
+# For compilation on Windows replace the previous line with something like this:
#
# JNI_INCLUDES = -I "C:/Program Files/Java/jdk1.8.0_171/include" -I "C:/Program Files/Java/jdk1.8.0_171/include/win32" -I "C:/MinGW/msys/1.0/local/include"
-# WINDOWS_FLAGS = -L"C:/MinGW/msys/1.0/local/lib" -no-undefined
+# WINDOWS_LDFLAGS = -L"C:/MinGW/msys/1.0/local/lib" -no-undefined
-INSTALL_PATH = /usr/local/lib
-LIBTOOL = glibtool --tag=CC
+GCC = gcc
+LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool, libtool) --tag=CC
-LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool --tag=CC, libtool)
+# For cross-compilation from Linux to Windows replace the previous two lines with:
+#
+# GCC = x86_64-w64-mingw32-gcc
+# LIBTOOL = ../c/libtool
+# WINDOWS_CCFLAGS = -I$(INSTALL_PATH)/include
+# WINDOWS_LDFLAGS = -L$(INSTALL_PATH)/lib -no-undefined
all: libjpgf.la jpgf.jar
libjpgf.la: $(patsubst %.c, %.lo, $(C_SOURCES))
- $(LIBTOOL) --mode=link gcc $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH) -lgu -lpgf -lsg $(WINDOWS_FLAGS)
+ $(LIBTOOL) --mode=link $(GCC) $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH)/lib -lgu -lpgf -lsg $(WINDOWS_LDFLAGS)
%.lo : %.c
- $(LIBTOOL) --mode=compile gcc $(CFLAGS) -g -O -c $(JNI_INCLUDES) -std=c99 -shared $< -o $@
+ $(LIBTOOL) --mode=compile $(GCC) $(CFLAGS) -g -O -c $(JNI_INCLUDES) $(WINDOWS_CCFLAGS) -std=c99 -shared $< -o $@
jpgf.jar: $(patsubst %.java, %.class, $(JAVA_SOURCES))
jar -cf $@ org/grammaticalframework/pgf/*.class org/grammaticalframework/sg/*.class
@@ -32,8 +40,8 @@ jpgf.jar: $(patsubst %.java, %.class, $(JAVA_SOURCES))
javac $<
install: libjpgf.la jpgf.jar
- $(LIBTOOL) --mode=install install -s libjpgf.la $(INSTALL_PATH)
- install jpgf.jar $(INSTALL_PATH)
+ $(LIBTOOL) --mode=install install -s libjpgf.la $(INSTALL_PATH)/lib
+ install jpgf.jar $(INSTALL_PATH)/lib
doc:
diff --git a/src/runtime/javascript/DEPRECATED.md b/src/runtime/javascript/DEPRECATED.md
new file mode 100644
index 000000000..83f4e51d6
--- /dev/null
+++ b/src/runtime/javascript/DEPRECATED.md
@@ -0,0 +1,4 @@
+# Deprecation notice
+
+As of June 2019, this JavaScript version of the GF runtime is considered deprecated,
+in favour of the TypeScript version in .
diff --git a/src/runtime/javascript/gflib.js b/src/runtime/javascript/gflib.js
index 97e98aab2..0dc5a2ff3 100644
--- a/src/runtime/javascript/gflib.js
+++ b/src/runtime/javascript/gflib.js
@@ -1,38 +1,38 @@
function GFGrammar(abstract, concretes) {
- this.abstract = abstract;
- this.concretes = concretes;
+ this.abstract = abstract;
+ this.concretes = concretes;
}
-/* Translates a string from any concrete syntax to all concrete syntaxes.
+/* Translates a string from any concrete syntax to all concrete syntaxes.
Uses the start category of the grammar.
*/
GFGrammar.prototype.translate = function (input, fromLang, toLang) {
- var outputs = new Object();
- var fromConcs = this.concretes;
- if (fromLang) {
- fromConcs = new Object();
- fromConcs[fromLang] = this.concretes[fromLang];
- }
- var toConcs = this.concretes;
- if (toLang) {
- toConcs = new Object();
- toConcs[toLang] = this.concretes[toLang];
- }
- for (var c1 in fromConcs) {
- var concrete = this.concretes[c1];
- var trees = concrete.parseString(input, this.abstract.startcat);
- if (trees.length > 0) {
- outputs[c1] = new Array();
- for (var i in trees) {
- outputs[c1][i] = new Object();
- for (var c2 in toConcs) {
- outputs[c1][i][c2] = this.concretes[c2].linearize(trees[i]);
- }
- }
- }
- }
- return outputs;
+ var outputs = new Object();
+ var fromConcs = this.concretes;
+ if (fromLang) {
+ fromConcs = new Object();
+ fromConcs[fromLang] = this.concretes[fromLang];
+ }
+ var toConcs = this.concretes;
+ if (toLang) {
+ toConcs = new Object();
+ toConcs[toLang] = this.concretes[toLang];
+ }
+ for (var c1 in fromConcs) {
+ var concrete = this.concretes[c1];
+ var trees = concrete.parseString(input, this.abstract.startcat);
+ if (trees.length > 0) {
+ outputs[c1] = new Array();
+ for (var i in trees) {
+ outputs[c1][i] = new Object();
+ for (var c2 in toConcs) {
+ outputs[c1][i][c2] = this.concretes[c2].linearize(trees[i]);
+ }
+ }
+ }
+ }
+ return outputs;
}
@@ -47,56 +47,56 @@ String.prototype.setTag = function (tag) { this.tag = tag; };
/* Abstract syntax trees */
function Fun(name) {
- this.name = name;
- this.args = new Array();
- for (var i = 1; i < arguments.length; i++) {
- this.args[i-1] = arguments[i];
- }
+ this.name = name;
+ this.args = new Array();
+ for (var i = 1; i < arguments.length; i++) {
+ this.args[i-1] = arguments[i];
+ }
}
Fun.prototype.print = function () { return this.show(0); } ;
Fun.prototype.show = function (prec) {
- if (this.isMeta()) {
- if (isUndefined(this.type)) {
- return '?';
- } else {
- var s = '?:' + this.type;
- if (prec > 0) {
- s = "(" + s + ")" ;
- }
- return s;
- }
- } else {
- var s = this.name;
- var cs = this.args;
- for (var i in cs) {
- s += " " + (isUndefined(cs[i]) ? "undefined" : cs[i].show(1));
- }
- if (prec > 0 && cs.length > 0) {
- s = "(" + s + ")" ;
- }
- return s;
- }
+ if (this.isMeta()) {
+ if (isUndefined(this.type)) {
+ return '?';
+ } else {
+ var s = '?:' + this.type;
+ if (prec > 0) {
+ s = "(" + s + ")" ;
+ }
+ return s;
+ }
+ } else {
+ var s = this.name;
+ var cs = this.args;
+ for (var i in cs) {
+ s += " " + (isUndefined(cs[i]) ? "undefined" : cs[i].show(1));
+ }
+ if (prec > 0 && cs.length > 0) {
+ s = "(" + s + ")" ;
+ }
+ return s;
+ }
};
Fun.prototype.getArg = function (i) {
- return this.args[i];
+ return this.args[i];
};
Fun.prototype.setArg = function (i,c) {
- this.args[i] = c;
+ this.args[i] = c;
};
Fun.prototype.isMeta = function() {
- return this.name == '?';
+ return this.name == '?';
} ;
Fun.prototype.isComplete = function() {
- if (this.isMeta()) {
- return false;
- } else {
- for (var i in this.args) {
- if (!this.args[i].isComplete()) {
- return false;
- }
- }
- return true;
- }
+ if (this.isMeta()) {
+ return false;
+ } else {
+ for (var i in this.args) {
+ if (!this.args[i].isComplete()) {
+ return false;
+ }
+ }
+ return true;
+ }
} ;
Fun.prototype.isLiteral = function() {
return (/^[\"\-\d]/).test(this.name);
@@ -120,146 +120,146 @@ Fun.prototype.isEqual = function(obj) {
if (!this.args[i].isEqual(obj.args[i]))
return false;
}
-
+
return true;
}
/* Type annotation */
function GFAbstract(startcat, types) {
- this.startcat = startcat;
- this.types = types;
+ this.startcat = startcat;
+ this.types = types;
}
GFAbstract.prototype.addType = function(fun, args, cat) {
- this.types[fun] = new Type(args, cat);
+ this.types[fun] = new Type(args, cat);
} ;
GFAbstract.prototype.getArgs = function(fun) {
- return this.types[fun].args;
+ return this.types[fun].args;
}
GFAbstract.prototype.getCat = function(fun) {
- return this.types[fun].cat;
+ return this.types[fun].cat;
};
GFAbstract.prototype.annotate = function(tree, type) {
- if (tree.name == '?') {
- tree.type = type;
- } else {
- var typ = this.types[tree.name];
- for (var i in tree.args) {
- this.annotate(tree.args[i], typ.args[i]);
- }
- }
- return tree;
+ if (tree.name == '?') {
+ tree.type = type;
+ } else {
+ var typ = this.types[tree.name];
+ for (var i in tree.args) {
+ this.annotate(tree.args[i], typ.args[i]);
+ }
+ }
+ return tree;
} ;
GFAbstract.prototype.handleLiterals = function(tree, type) {
- if (tree.name != '?') {
- if (type == "String" || type == "Int" || type == "Float") {
- tree.name = type + "_Literal_" + tree.name;
- } else {
- var typ = this.types[tree.name];
- for (var i in tree.args) {
- this.handleLiterals(tree.args[i], typ.args[i]);
- }
- }
- }
- return tree;
+ if (tree.name != '?') {
+ if (type == "String" || type == "Int" || type == "Float") {
+ tree.name = type + "_Literal_" + tree.name;
+ } else {
+ var typ = this.types[tree.name];
+ for (var i in tree.args) {
+ this.handleLiterals(tree.args[i], typ.args[i]);
+ }
+ }
+ }
+ return tree;
} ;
/* Hack to get around the fact that our SISR doesn't build real Fun objects. */
GFAbstract.prototype.copyTree = function(x) {
- var t = new Fun(x.name);
- if (!isUndefined(x.type)) {
- t.type = x.type;
- }
- var cs = x.args;
- if (!isUndefined(cs)) {
- for (var i in cs) {
- t.setArg(i, this.copyTree(cs[i]));
- }
- }
- return t;
+ var t = new Fun(x.name);
+ if (!isUndefined(x.type)) {
+ t.type = x.type;
+ }
+ var cs = x.args;
+ if (!isUndefined(cs)) {
+ for (var i in cs) {
+ t.setArg(i, this.copyTree(cs[i]));
+ }
+ }
+ return t;
} ;
-GFAbstract.prototype.parseTree = function(str, type) {
- return this.annotate(this.parseTree_(str.match(/[\w\'\.\"]+|\(|\)|\?|\:/g), 0), type);
+GFAbstract.prototype.parseTree = function(str, type) {
+ return this.annotate(this.parseTree_(str.match(/[\w\'\.\"]+|\(|\)|\?|\:/g), 0), type);
} ;
GFAbstract.prototype.parseTree_ = function(tokens, prec) {
- if (tokens.length == 0 || tokens[0] == ")") { return null; }
- var t = tokens.shift();
- if (t == "(") {
- var tree = this.parseTree_(tokens, 0);
- tokens.shift();
- return tree;
- } else if (t == '?') {
- var tree = this.parseTree_(tokens, 0);
- return new Fun('?');
- } else {
- var tree = new Fun(t);
- if (prec == 0) {
- var c, i;
- for (i = 0; (c = this.parseTree_(tokens, 1)) !== null; i++) {
- tree.setArg(i,c);
- }
- }
- return tree;
- }
+ if (tokens.length == 0 || tokens[0] == ")") { return null; }
+ var t = tokens.shift();
+ if (t == "(") {
+ var tree = this.parseTree_(tokens, 0);
+ tokens.shift();
+ return tree;
+ } else if (t == '?') {
+ var tree = this.parseTree_(tokens, 0);
+ return new Fun('?');
+ } else {
+ var tree = new Fun(t);
+ if (prec == 0) {
+ var c, i;
+ for (i = 0; (c = this.parseTree_(tokens, 1)) !== null; i++) {
+ tree.setArg(i,c);
+ }
+ }
+ return tree;
+ }
} ;
function Type(args, cat) {
- this.args = args;
- this.cat = cat;
+ this.args = args;
+ this.cat = cat;
}
/* Linearization */
function GFConcrete(flags, productions, functions, sequences, startCats, totalFIds) {
- this.flags = flags;
- this.productions = productions;
- this.functions = functions;
- this.sequences = sequences;
- this.startCats = startCats;
- this.totalFIds = totalFIds;
+ this.flags = flags;
+ this.productions = productions;
+ this.functions = functions;
+ this.sequences = sequences;
+ this.startCats = startCats;
+ this.totalFIds = totalFIds;
- this.pproductions = productions;
- this.lproductions = new Object();
+ this.pproductions = productions;
+ this.lproductions = new Object();
for (var fid in productions) {
for (var i in productions[fid]) {
var rule = productions[fid][i];
-
+
if (rule.id == "Apply") {
var fun = this.functions[rule.fun];
var lproductions = this.lproductions;
-
+
rule.fun = fun;
var register = function (args, key, i) {
- if (i < args.length) {
- var c = 0;
- var arg = args[i].fid;
-
- for (var k in productions[arg]) {
+ if (i < args.length) {
+ var c = 0;
+ var arg = args[i].fid;
+
+ for (var k in productions[arg]) {
var rule = productions[arg][k];
if (rule.id == "Coerce") {
register(args,key + "_" + rule.arg,i+1);
c++;
}
}
-
+
if (c == 0)
register(args,key + "_" + arg,i+1);
- } else {
- var set = lproductions[key];
+ } else {
+ var set = lproductions[key];
if (set == null) {
set = new Array();
lproductions[key] = set;
}
- set.push({fun: fun, fid: fid});
- }
- }
+ set.push({fun: fun, fid: fid});
+ }
+ }
register(rule.args,rule.fun.name,0);
- }
+ }
}
}
-
+
for (var i in functions) {
var fun = functions[i];
for (var j in fun.lins) {
@@ -267,30 +267,30 @@ function GFConcrete(flags, productions, functions, sequences, startCats, totalFI
}
}
}
-GFConcrete.prototype.linearizeSyms = function (tree, tag) {
+GFConcrete.prototype.linearizeSyms = function (tree, tag) {
var res = new Array();
-
+
if (tree.isString()) {
- var sym = new SymKS(tree.name);
- sym.tag = tag;
- res.push({fid: -1, table: [[sym]]});
+ var sym = new SymKS(tree.name);
+ sym.tag = tag;
+ res.push({fid: -1, table: [[sym]]});
} else if (tree.isInt()) {
- var sym = new SymKS(tree.name);
- sym.tag = tag;
- res.push({fid: -2, table: [[sym]]});
+ var sym = new SymKS(tree.name);
+ sym.tag = tag;
+ res.push({fid: -2, table: [[sym]]});
} else if (tree.isFloat()) {
- var sym = new SymKS(tree.name);
- sym.tag = tag;
- res.push({fid: -3, table: [[sym]]});
+ var sym = new SymKS(tree.name);
+ sym.tag = tag;
+ res.push({fid: -3, table: [[sym]]});
} else if (tree.isMeta()) {
- // TODO: Use lindef here
+ // TODO: Use lindef here
var cat = this.startCats[tree.type];
-
+
var sym = new SymKS(tree.name);
- sym.tag = tag;
-
- for (var fid = cat.s; fid <= cat.e; fid++) {
- res.push({fid: fid, table: [[sym]]});
+ sym.tag = tag;
+
+ for (var fid = cat.s; fid <= cat.e; fid++) {
+ res.push({fid: fid, table: [[sym]]});
}
} else {
var cs = new Array();
@@ -310,7 +310,7 @@ GFConcrete.prototype.linearizeSyms = function (tree, tag) {
var lin = rule.fun.lins[j];
var toks = new Array();
row.table[j] = toks;
-
+
for (var k in lin) {
var sym = lin[k];
switch (sym.id) {
@@ -331,7 +331,7 @@ GFConcrete.prototype.linearizeSyms = function (tree, tag) {
res.push(row);
}
}
-
+
return res;
};
GFConcrete.prototype.syms2toks = function (syms) {
@@ -347,7 +347,7 @@ GFConcrete.prototype.syms2toks = function (syms) {
case "KP":
for (var j in sym.tokens) {
ts.push(this.tagIt(sym.tokens[j],sym.tag));
- }
+ }
break;
}
}
@@ -365,37 +365,37 @@ GFConcrete.prototype.linearize = function (tree) {
var res = this.linearizeSyms(tree,"0");
return this.unlex(this.syms2toks(res[0].table[0]));
}
-GFConcrete.prototype.tagAndLinearize = function (tree) {
+GFConcrete.prototype.tagAndLinearize = function (tree) {
var res = this.linearizeSyms(tree,"0");
return this.syms2toks(res[0].table[0]);
}
GFConcrete.prototype.unlex = function (ts) {
- if (ts.length == 0) {
- return "";
- }
+ if (ts.length == 0) {
+ return "";
+ }
- var noSpaceAfter = /^[\(\-\[]/;
- var noSpaceBefore = /^[\.\,\?\!\)\:\;\-\]]/;
+ var noSpaceAfter = /^[\(\-\[]/;
+ var noSpaceBefore = /^[\.\,\?\!\)\:\;\-\]]/;
- var s = "";
- for (var i = 0; i < ts.length; i++) {
- var t = ts[i];
- var after = i < ts.length-1 ? ts[i+1] : null;
- s += t;
- if (after != null && !t.match(noSpaceAfter)
- && !after.match(noSpaceBefore)) {
- s += " ";
- }
- }
- return s;
+ var s = "";
+ for (var i = 0; i < ts.length; i++) {
+ var t = ts[i];
+ var after = i < ts.length-1 ? ts[i+1] : null;
+ s += t;
+ if (after != null && !t.match(noSpaceAfter)
+ && !after.match(noSpaceBefore)) {
+ s += " ";
+ }
+ }
+ return s;
};
GFConcrete.prototype.tagIt = function (obj, tag) {
if (isString(obj)) {
- var o = new String(obj);
- o.setTag(tag);
- return o;
+ var o = new String(obj);
+ o.setTag(tag);
+ return o;
} else {
- var me = arguments.callee;
+ var me = arguments.callee;
if (arguments.length == 2) {
me.prototype = obj;
var o = new me();
@@ -416,28 +416,28 @@ function isNumber(a) { return typeof a == 'number' && isFinite(a); }
function isFunction(a) { return typeof a == 'function'; }
function dumpObject (obj) {
- if (isUndefined(obj)) {
- return "undefined";
- } else if (isString(obj)) {
- return '"' + obj.toString() + '"'; // FIXME: escape
- } else if (isBoolean(obj) || isNumber(obj)) {
- return obj.toString();
- } else if (isArray(obj)) {
- var x = "[";
- for (var i in obj) {
- x += dumpObject(obj[i]);
- if (i < obj.length-1) {
- x += ",";
- }
- }
- return x + "]";
- } else {
- var x = "{";
- for (var y in obj) {
- x += y + "=" + dumpObject(obj[y]) + ";" ;
- }
- return x + "}";
- }
+ if (isUndefined(obj)) {
+ return "undefined";
+ } else if (isString(obj)) {
+ return '"' + obj.toString() + '"'; // FIXME: escape
+ } else if (isBoolean(obj) || isNumber(obj)) {
+ return obj.toString();
+ } else if (isArray(obj)) {
+ var x = "[";
+ for (var i in obj) {
+ x += dumpObject(obj[i]);
+ if (i < obj.length-1) {
+ x += ",";
+ }
+ }
+ return x + "]";
+ } else {
+ var x = "{";
+ for (var y in obj) {
+ x += y + "=" + dumpObject(obj[y]) + ";" ;
+ }
+ return x + "}";
+ }
}
/* ------------------------------------------------------------------------- */
@@ -447,11 +447,11 @@ function dumpObject (obj) {
GFConcrete.prototype.showRules = function () {
var ruleStr = new Array();
- ruleStr.push("");
- for (var i = 0, j = this.rules.length; i < j; i++) {
- ruleStr.push(this.rules[i].show());
- }
- return ruleStr.join("");
+ ruleStr.push("");
+ for (var i = 0, j = this.rules.length; i < j; i++) {
+ ruleStr.push(this.rules[i].show());
+ }
+ return ruleStr.join("");
};
GFConcrete.prototype.tokenize = function (string) {
var inToken = false;
@@ -460,125 +460,125 @@ GFConcrete.prototype.tokenize = function (string) {
for (var i = 0; i < string.length; i++) {
if ( string.charAt(i) == ' ' // space
- || string.charAt(i) == '\f' // form feed
- || string.charAt(i) == '\n' // newline
- || string.charAt(i) == '\r' // return
- || string.charAt(i) == '\t' // horizontal tab
- || string.charAt(i) == '\v' // vertical tab
+ || string.charAt(i) == '\f' // form feed
+ || string.charAt(i) == '\n' // newline
+ || string.charAt(i) == '\r' // return
+ || string.charAt(i) == '\t' // horizontal tab
+ || string.charAt(i) == '\v' // vertical tab
|| string.charAt(i) == String.fromCharCode(160) //
) {
- if (inToken) {
+ if (inToken) {
end = i-1;
inToken = false;
-
+
tokens.push(string.substr(start,end-start+1));
}
- } else {
+ } else {
if (!inToken) {
start = i;
inToken = true;
}
}
}
-
+
if (inToken) {
end = i-1;
inToken = false;
-
+
tokens.push(string.substr(start,end-start+1));
}
return tokens;
};
GFConcrete.prototype.parseString = function (string, cat) {
- var tokens = this.tokenize(string);
-
- var ps = new ParseState(this, cat);
- for (var i in tokens) {
- if (!ps.next(tokens[i]))
+ var tokens = this.tokenize(string);
+
+ var ps = new ParseState(this, cat);
+ for (var i in tokens) {
+ if (!ps.next(tokens[i]))
return new Array();
- }
- return ps.extractTrees();
+ }
+ return ps.extractTrees();
};
/**
* Generate list of suggestions given an input string
*/
GFConcrete.prototype.complete = function (input, cat) {
- // Parameter defaults
- if (input == null) input = "";
- if (cat == null) cat = grammar.abstract.startcat;
-
- // Tokenise input string & remove empty tokens
- tokens = input.trim().split(' ');
- for (var i = tokens.length - 1; i >= 0; i--) {
- if (tokens[i] == "") { tokens.splice(i, 1); }
- }
-
- // Capture last token as it may be partial
- current = tokens.pop();
- if (current == null) current = "";
+ // Parameter defaults
+ if (input == null) input = "";
+ if (cat == null) cat = grammar.abstract.startcat;
- // Init parse state objects.
- // ps2 is used for testing whether the final token is parsable or not.
- var ps = new ParseState(this, cat);
- var ps2 = new ParseState(this, cat);
-
- // Iterate over tokens, feed one by one to parser
- for (var i = 0; i < tokens.length ; i++) {
- if (!ps.next(tokens[i])) {
- return new Array(); // Incorrect parse, nothing to suggest
- }
- ps2.next(tokens[i]); // also consume token in ps2
- }
-
- // Attempt to also parse current, knowing it may be incomplete
- if (ps2.next(current)) {
- ps.next(current);
- tokens.push(current);
- current = "";
- }
- delete(ps2); // don't need this anymore
-
- // Parse is successful so far, now get suggestions
- var acc = ps.complete(current);
-
- // Format into just a list of strings & return
- // (I know the multiple nesting looks horrible)
- var suggs = new Array();
- if (acc.value) {
- // Iterate over all acc.value[]
- for (var v = 0; v < acc.value.length; v++) {
- // Iterate over all acc.value[].seq[]
- for (var s = 0; s < acc.value[v].seq.length; s++) {
- if (acc.value[v].seq[s].tokens == null) continue;
- // Iterate over all acc.value[].seq[].tokens
- for (var t = 0; t < acc.value[v].seq[s].tokens.length; t++) {
- suggs.push( acc.value[v].seq[s].tokens[t] );
- }
- }
- }
- }
-
- // Note: return used tokens too
- return { 'consumed' : tokens, 'suggestions' : suggs };
+ // Tokenise input string & remove empty tokens
+ tokens = input.trim().split(' ');
+ for (var i = tokens.length - 1; i >= 0; i--) {
+ if (tokens[i] == "") { tokens.splice(i, 1); }
+ }
+
+ // Capture last token as it may be partial
+ current = tokens.pop();
+ if (current == null) current = "";
+
+ // Init parse state objects.
+ // ps2 is used for testing whether the final token is parsable or not.
+ var ps = new ParseState(this, cat);
+ var ps2 = new ParseState(this, cat);
+
+ // Iterate over tokens, feed one by one to parser
+ for (var i = 0; i < tokens.length ; i++) {
+ if (!ps.next(tokens[i])) {
+ return new Array(); // Incorrect parse, nothing to suggest
+ }
+ ps2.next(tokens[i]); // also consume token in ps2
+ }
+
+ // Attempt to also parse current, knowing it may be incomplete
+ if (ps2.next(current)) {
+ ps.next(current);
+ tokens.push(current);
+ current = "";
+ }
+ delete(ps2); // don't need this anymore
+
+ // Parse is successful so far, now get suggestions
+ var acc = ps.complete(current);
+
+ // Format into just a list of strings & return
+ // (I know the multiple nesting looks horrible)
+ var suggs = new Array();
+ if (acc.value) {
+ // Iterate over all acc.value[]
+ for (var v = 0; v < acc.value.length; v++) {
+ // Iterate over all acc.value[].seq[]
+ for (var s = 0; s < acc.value[v].seq.length; s++) {
+ if (acc.value[v].seq[s].tokens == null) continue;
+ // Iterate over all acc.value[].seq[].tokens
+ for (var t = 0; t < acc.value[v].seq[s].tokens.length; t++) {
+ suggs.push( acc.value[v].seq[s].tokens[t] );
+ }
+ }
+ }
+ }
+
+ // Note: return used tokens too
+ return { 'consumed' : tokens, 'suggestions' : suggs };
}
// Apply Object Definition
function Apply(fun, args) {
- this.id = "Apply";
- this.fun = fun;
- this.args = args;
+ this.id = "Apply";
+ this.fun = fun;
+ this.args = args;
}
Apply.prototype.show = function (cat) {
- var recStr = new Array();
- recStr.push(cat, " -> ", fun.name, " [", this.args, "]");
- return recStr.join("");
+ var recStr = new Array();
+ recStr.push(cat, " -> ", fun.name, " [", this.args, "]");
+ return recStr.join("");
};
Apply.prototype.isEqual = function (obj) {
- if (this.id != obj.id || this.fun != obj.fun || this.args.length != obj.args.length)
+ if (this.id != obj.id || this.fun != obj.fun || this.args.length != obj.args.length)
return false;
-
+
for (var i in this.args) {
if (this.args[i] != obj.args[i])
return false;
@@ -588,39 +588,39 @@ Apply.prototype.isEqual = function (obj) {
};
function PArg() {
- this.fid = arguments[arguments.length-1];
- if (arguments.length > 1)
- this.hypos = arguments.slice(0,arguments.length-1);
+ this.fid = arguments[arguments.length-1];
+ if (arguments.length > 1)
+ this.hypos = arguments.slice(0,arguments.length-1);
}
// Coerce Object Definition
function Coerce(arg) {
this.id = "Coerce";
- this.arg = arg;
+ this.arg = arg;
}
Coerce.prototype.show = function (cat) {
- var recStr = new Array();
- recStr.push(cat, " -> _ [", this.args, "]");
- return recStr.join("");
+ var recStr = new Array();
+ recStr.push(cat, " -> _ [", this.args, "]");
+ return recStr.join("");
};
// Const Object Definition
function Const(lit, toks) {
this.id = "Const";
- this.lit = lit;
- this.toks = toks;
+ this.lit = lit;
+ this.toks = toks;
}
Const.prototype.show = function (cat) {
- var recStr = new Array();
- recStr.push(cat, " -> ", lit.print());
- return recStr.join("");
+ var recStr = new Array();
+ recStr.push(cat, " -> ", lit.print());
+ return recStr.join("");
};
Const.prototype.isEqual = function (obj) {
- if (this.id != obj.id || this.lit.isEqual(obj.lit) || this.toks.length != obj.toks.length)
+ if (this.id != obj.id || this.lit.isEqual(obj.lit) || this.toks.length != obj.toks.length)
return false;
-
+
for (var i in this.toks) {
if (this.toks[i] != obj.toks[i])
return false;
@@ -638,41 +638,41 @@ function CncFun(name,lins) {
// Object to represent argument projections in grammar rules
function SymCat(i, label) {
- this.id = "Arg";
- this.i = i;
- this.label = label;
+ this.id = "Arg";
+ this.i = i;
+ this.label = label;
}
SymCat.prototype.getId = function () { return this.id; };
SymCat.prototype.getArgNum = function () { return this.i };
SymCat.prototype.show = function () {
- var argStr = new Array();
- argStr.push(this.i, this.label);
- return argStr.join(".");
+ var argStr = new Array();
+ argStr.push(this.i, this.label);
+ return argStr.join(".");
};
// Object to represent terminals in grammar rules
function SymKS() {
- this.id = "KS";
- this.tokens = arguments;
+ this.id = "KS";
+ this.tokens = arguments;
}
SymKS.prototype.getId = function () { return this.id; };
SymKS.prototype.show = function () {
- var terminalStr = new Array();
- terminalStr.push('"', this.tokens, '"');
- return terminalStr.join("");
+ var terminalStr = new Array();
+ terminalStr.push('"', this.tokens, '"');
+ return terminalStr.join("");
};
// Object to represent pre in grammar rules
function SymKP(tokens,alts) {
- this.id = "KP";
- this.tokens = tokens;
+ this.id = "KP";
+ this.tokens = tokens;
this.alts = alts;
}
SymKP.prototype.getId = function () { return this.id; };
SymKP.prototype.show = function () {
- var terminalStr = new Array();
- terminalStr.push('"', this.tokens, '"');
- return terminalStr.join("");
+ var terminalStr = new Array();
+ terminalStr.push('"', this.tokens, '"');
+ return terminalStr.join("");
};
function Alt(tokens, prefixes) {
@@ -682,15 +682,15 @@ function Alt(tokens, prefixes) {
// Object to represent pre in grammar rules
function SymLit(i,label) {
- this.id = "Lit";
- this.i = i;
- this.label = label;
+ this.id = "Lit";
+ this.i = i;
+ this.label = label;
}
SymLit.prototype.getId = function () { return this.id; };
SymLit.prototype.show = function () {
- var argStr = new Array();
- argStr.push(this.i, this.label);
- return argStr.join(".");
+ var argStr = new Array();
+ argStr.push(this.i, this.label);
+ return argStr.join(".");
};
// Parsing
@@ -732,11 +732,11 @@ Trie.prototype.lookup = function(key,obj) {
Trie.prototype.isEmpty = function() {
if (this.value != null)
return false;
-
+
for (var i in this.items) {
return false;
}
-
+
return true;
}
@@ -747,7 +747,7 @@ function ParseState(concrete, startCat) {
this.chart = new Chart(concrete);
var items = new Array();
-
+
var fids = concrete.startCats[startCat];
if (fids != null) {
var fid;
@@ -762,7 +762,7 @@ function ParseState(concrete, startCat) {
}
}
}
-
+
this.items.insertChain(new Array(), items);
}
ParseState.prototype.next = function (token) {
@@ -785,7 +785,7 @@ ParseState.prototype.next = function (token) {
else
return null;
}
-
+
return null;
}
, function (tokens, item) {
@@ -802,7 +802,7 @@ ParseState.prototype.next = function (token) {
this.items = acc;
this.chart.shift();
-
+
return !this.items.isEmpty();
}
/**
@@ -812,35 +812,35 @@ ParseState.prototype.next = function (token) {
*/
ParseState.prototype.complete = function (currentToken) {
- // Initialise accumulator for suggestions
- var acc = this.items.lookup(currentToken);
- if (acc == null)
- acc = new Trie();
-
- this.process(
- // Items
- this.items.value,
-
- // Deal with literal categories
- function (fid) {
- // Always return null, as suggested by Krasimir
- return null;
- },
-
- // Takes an array of tokens and populates the accumulator
- function (tokens, item) {
- if (currentToken == "" || tokens[0].indexOf(currentToken) == 0) { //if begins with...
- var tokens1 = new Array();
- for (var i = 1; i < tokens.length; i++) {
- tokens1[i-1] = tokens[i];
- }
- acc.insertChain1(tokens1, item);
- }
- }
- );
-
- // Return matches
- return acc;
+ // Initialise accumulator for suggestions
+ var acc = this.items.lookup(currentToken);
+ if (acc == null)
+ acc = new Trie();
+
+ this.process(
+ // Items
+ this.items.value,
+
+ // Deal with literal categories
+ function (fid) {
+ // Always return null, as suggested by Krasimir
+ return null;
+ },
+
+ // Takes an array of tokens and populates the accumulator
+ function (tokens, item) {
+ if (currentToken == "" || tokens[0].indexOf(currentToken) == 0) { //if begins with...
+ var tokens1 = new Array();
+ for (var i = 1; i < tokens.length; i++) {
+ tokens1[i-1] = tokens[i];
+ }
+ acc.insertChain1(tokens1, item);
+ }
+ }
+ );
+
+ // Return matches
+ return acc;
}
ParseState.prototype.extractTrees = function() {
this.process( this.items.value
@@ -850,11 +850,11 @@ ParseState.prototype.extractTrees = function() {
, function (tokens, item) {
}
);
-
-
+
+
var totalFIds = this.concrete.totalFIds;
var forest = this.chart.forest;
-
+
function go(fid) {
if (fid < totalFIds) {
return [new Fun("?")];
@@ -864,24 +864,24 @@ ParseState.prototype.extractTrees = function() {
var rules = forest[fid];
for (var j in rules) {
var rule = rules[j];
-
+
if (rule.id == "Const") {
trees.push(rule.lit);
- } else {
+ } else {
var arg_ix = new Array();
var arg_ts = new Array();
for (var k in rule.args) {
arg_ix[k] = 0;
arg_ts[k] = go(rule.args[k].fid);
}
-
+
while (true) {
var t = new Fun(rule.fun.name);
for (var k in arg_ts) {
t.setArg(k,arg_ts[k][arg_ix[k]]);
}
trees.push(t);
-
+
var i = 0;
while (i < arg_ts.length) {
arg_ix[i]++;
@@ -889,26 +889,26 @@ ParseState.prototype.extractTrees = function() {
break;
arg_ix[i] = 0;
- i++;
+ i++;
}
-
+
if (i >= arg_ts.length)
break;
}
}
}
-
+
return trees;
}
}
-
+
var trees = new Array();
var fids = this.concrete.startCats[this.startCat];
if (fids != null) {
var fid0;
for (fid0 = fids.s; fid0 <= fids.e; fid0++) {
-
+
var labels = new Object();
var rules = this.chart.expandForest(fid0);
for (var i in rules) {
@@ -916,7 +916,7 @@ ParseState.prototype.extractTrees = function() {
labels[lbl] = true;
}
}
-
+
for (var lbl in labels) {
var fid = this.chart.lookupPC(fid0,lbl,0);
var arg_ts = go(fid);
@@ -928,14 +928,14 @@ ParseState.prototype.extractTrees = function() {
break;
}
}
-
+
if (!isMember)
trees.push(arg_ts[i]);
}
}
}
- }
-
+ }
+
return trees;
}
ParseState.prototype.process = function (agenda,literalCallback,tokenCallback) {
@@ -966,10 +966,10 @@ ParseState.prototype.process = function (agenda,literalCallback,tokenCallback) {
break;
}
}
-
+
if (!isMember) {
items.push(item);
-
+
var fid2 = this.chart.lookupPC(fid,label,this.chart.offset);
if (fid2 != null) {
agenda.push(item.shiftOverArg(sym.i,fid2));
@@ -1004,7 +1004,7 @@ ParseState.prototype.process = function (agenda,literalCallback,tokenCallback) {
var fid = this.chart.lookupPC(item.fid,item.lbl,item.offset);
if (fid == null) {
fid = this.chart.nextId++;
-
+
var items = this.chart.lookupACo(item.offset,item.fid,item.lbl);
if (items != null) {
for (var j in items) {
@@ -1013,7 +1013,7 @@ ParseState.prototype.process = function (agenda,literalCallback,tokenCallback) {
agenda.push(pitem.shiftOverArg(i,fid));
}
}
-
+
this.chart.insertPC(item.fid,item.lbl,item.offset,fid);
this.chart.forest[fid] = [new Apply(item.fun,item.args)];
} else {
@@ -1023,16 +1023,16 @@ ParseState.prototype.process = function (agenda,literalCallback,tokenCallback) {
agenda.push(new ActiveItem(this.chart.offset,0,item.fun,item.fun.lins[lbl],item.args,fid,lbl));
}
}
-
+
var rules = this.chart.forest[fid];
var rule = new Apply(item.fun,item.args);
-
+
var isMember = false;
for (var j in rules) {
if (rules[j].isEqual(rule))
isMember = true;
}
-
+
if (!isMember)
rules.push(rule);
}
@@ -1048,7 +1048,7 @@ function Chart(concrete) {
this.forest = new Object();
this.nextId = concrete.totalFIds;
this.offset = 0;
-
+
for (var fid in concrete.pproductions) {
this.forest[fid] = concrete.pproductions[fid];
}
@@ -1061,7 +1061,7 @@ Chart.prototype.lookupAC = function (fid,label) {
}
Chart.prototype.lookupACo = function (offset,fid,label) {
var tmp;
-
+
if (offset == this.offset)
tmp = this.active[fid];
else
@@ -1094,15 +1094,15 @@ Chart.prototype.insertPC = function (fid1,label,offset,fid2) {
Chart.prototype.shift = function () {
this.actives.push(this.active);
this.active = new Object();
-
+
this.passive = new Object();
-
+
this.offset++;
}
Chart.prototype.expandForest = function (fid) {
var rules = new Array();
var forest = this.forest;
-
+
var go = function (rules0) {
for (var i in rules0) {
var rule = rules0[i];
diff --git a/src/runtime/javascript/grammar.js b/src/runtime/javascript/grammar.js
index 9e246db50..c6b2bb989 100644
--- a/src/runtime/javascript/grammar.js
+++ b/src/runtime/javascript/grammar.js
@@ -1 +1 @@
-var Foods = new GFGrammar(new GFAbstract("Phrase",{Boring: new Type([], "Quality"), Cheese: new Type([], "Kind"), Delicious: new Type([], "Quality"), Expensive: new Type([], "Quality"), Fish: new Type([], "Kind"), Fresh: new Type([], "Quality"), Is: new Type(["Item", "Quality"], "Phrase"), Italian: new Type([], "Quality"), Pizza: new Type([], "Kind"), QKind: new Type(["Quality", "Kind"], "Kind"), That: new Type(["Kind"], "Item"), These: new Type(["Kind"], "Item"), This: new Type(["Kind"], "Item"), Those: new Type(["Kind"], "Item"), Very: new Type(["Quality"], "Quality"), Warm: new Type([], "Quality"), Wine: new Type([], "Kind")}),{FoodsEng: new GFConcrete({},{0:[new Apply(15,[new PArg(2)]), new Apply(17,[new PArg(2)])], 1:[new Apply(16,[new PArg(2)]), new Apply(18,[new PArg(2)])], 2:[new Apply(5,[]), new Apply(8,[]), new Apply(13,[]), new Apply(14,[new PArg(4), new PArg(2)]), new Apply(21,[])], 3:[new Apply(10,[new PArg(0), new PArg(4)]), new Apply(11,[new PArg(1), new PArg(4)])], 4:[new Apply(4,[]), new Apply(6,[]), new Apply(7,[]), new Apply(9,[]), new Apply(12,[]), new Apply(19,[new PArg(4)]), new Apply(20,[])]},[new CncFun("lindef Item",[0]), new CncFun("lindef Kind",[0, 0]), new CncFun("lindef Phrase",[0]), new CncFun("lindef Quality",[0]), new CncFun("Boring",[1]), new CncFun("Cheese",[2, 3]), new CncFun("Delicious",[4]), new CncFun("Expensive",[5]), new CncFun("Fish",[6, 6]), new CncFun("Fresh",[7]), new CncFun("Is",[8]), new CncFun("Is",[9]), new CncFun("Italian",[10]), new CncFun("Pizza",[11, 12]), new CncFun("QKind",[13, 14]), new CncFun("That",[15]), new CncFun("These",[16]), new CncFun("This",[17]), new CncFun("Those",[18]), new CncFun("Very",[19]), new CncFun("Warm",[20]), new CncFun("Wine",[21, 22])],[[new SymLit(0, 0)],[new SymKS("boring")],[new SymKS("cheese")],[new SymKS("cheeses")],[new SymKS("delicious")],[new SymKS("expensive")],[new SymKS("fish")],[new SymKS("fresh")],[new SymCat(0, 0), new SymKS("is"), new SymCat(1, 0)],[new SymCat(0, 0), new SymKS("are"), new SymCat(1, 0)],[new SymKS("Italian")],[new SymKS("pizza")],[new SymKS("pizzas")],[new SymCat(0, 0), new SymCat(1, 0)],[new SymCat(0, 0), new SymCat(1, 1)],[new SymKS("that"), new SymCat(0, 0)],[new SymKS("these"), new SymCat(0, 1)],[new SymKS("this"), new SymCat(0, 0)],[new SymKS("those"), new SymCat(0, 1)],[new SymKS("very"), new SymCat(0, 0)],[new SymKS("warm")],[new SymKS("wine")],[new SymKS("wines")]],{Float:{s: -3, e: -3}, Int:{s: -2, e: -2}, Item:{s: 0, e: 1}, Kind:{s: 2, e: 2}, Phrase:{s: 3, e: 3}, Quality:{s: 4, e: 4}, String:{s: -1, e: -1}, __gfVar:{s: -4, e: -4}}, 6)});
+var Foods = new GFGrammar(new GFAbstract("Phrase",{Boring: new Type([], "Quality"), Cheese: new Type([], "Kind"), Delicious: new Type([], "Quality"), Expensive: new Type([], "Quality"), Fish: new Type([], "Kind"), Fresh: new Type([], "Quality"), Is: new Type(["Item", "Quality"], "Phrase"), Italian: new Type([], "Quality"), Pizza: new Type([], "Kind"), QKind: new Type(["Quality", "Kind"], "Kind"), That: new Type(["Kind"], "Item"), These: new Type(["Kind"], "Item"), This: new Type(["Kind"], "Item"), Those: new Type(["Kind"], "Item"), Very: new Type(["Quality"], "Quality"), Warm: new Type([], "Quality"), Wine: new Type([], "Kind")}),{FoodsEng: new GFConcrete({},{0:[new Apply(19,[new PArg(2)]), new Apply(21,[new PArg(2)])], 1:[new Apply(20,[new PArg(2)]), new Apply(22,[new PArg(2)])], 2:[new Apply(9,[]), new Apply(12,[]), new Apply(17,[]), new Apply(18,[new PArg(4), new PArg(2)]), new Apply(25,[])], 3:[new Apply(14,[new PArg(0), new PArg(4)]), new Apply(15,[new PArg(1), new PArg(4)])], 4:[new Apply(8,[]), new Apply(10,[]), new Apply(11,[]), new Apply(13,[]), new Apply(16,[]), new Apply(23,[new PArg(4)]), new Apply(24,[])]},[new CncFun("'lindef Item'",[5]), new CncFun("'lindef Item'",[0]), new CncFun("'lindef Kind'",[5, 5]), new CncFun("'lindef Kind'",[0]), new CncFun("'lindef Phrase'",[5]), new CncFun("'lindef Phrase'",[0]), new CncFun("'lindef Quality'",[5]), new CncFun("'lindef Quality'",[0]), new CncFun("Boring",[7]), new CncFun("Cheese",[8, 9]), new CncFun("Delicious",[10]), new CncFun("Expensive",[11]), new CncFun("Fish",[12, 12]), new CncFun("Fresh",[13]), new CncFun("Is",[4]), new CncFun("Is",[3]), new CncFun("Italian",[6]), new CncFun("Pizza",[14, 15]), new CncFun("QKind",[1, 2]), new CncFun("That",[16]), new CncFun("These",[17]), new CncFun("This",[18]), new CncFun("Those",[19]), new CncFun("Very",[20]), new CncFun("Warm",[21]), new CncFun("Wine",[22, 23])],[[new SymCat(0, 0)],[new SymCat(0, 0), new SymCat(1, 0)],[new SymCat(0, 0), new SymCat(1, 1)],[new SymCat(0, 0), new SymKS("are"), new SymCat(1, 0)],[new SymCat(0, 0), new SymKS("is"), new SymCat(1, 0)],[new SymLit(0, 0)],[new SymKS("Italian")],[new SymKS("boring")],[new SymKS("cheese")],[new SymKS("cheeses")],[new SymKS("delicious")],[new SymKS("expensive")],[new SymKS("fish")],[new SymKS("fresh")],[new SymKS("pizza")],[new SymKS("pizzas")],[new SymKS("that"), new SymCat(0, 0)],[new SymKS("these"), new SymCat(0, 1)],[new SymKS("this"), new SymCat(0, 0)],[new SymKS("those"), new SymCat(0, 1)],[new SymKS("very"), new SymCat(0, 0)],[new SymKS("warm")],[new SymKS("wine")],[new SymKS("wines")]],{Float:{s: -3, e: -3}, Int:{s: -2, e: -2}, Item:{s: 0, e: 1}, Kind:{s: 2, e: 2}, Phrase:{s: 3, e: 3}, Quality:{s: 4, e: 4}, String:{s: -1, e: -1}}, 5), FoodsIta: new GFConcrete({},{0:[new Apply(22,[new PArg(4)]), new Apply(26,[new PArg(4)])], 1:[new Apply(23,[new PArg(5)]), new Apply(27,[new PArg(5)])], 2:[new Apply(24,[new PArg(4)]), new Apply(28,[new PArg(4)])], 3:[new Apply(25,[new PArg(5)]), new Apply(29,[new PArg(5)])], 4:[new Apply(9,[]), new Apply(12,[]), new Apply(20,[new PArg(7), new PArg(4)]), new Apply(32,[])], 5:[new Apply(19,[]), new Apply(21,[new PArg(7), new PArg(5)])], 6:[new Apply(14,[new PArg(0), new PArg(7)]), new Apply(15,[new PArg(1), new PArg(7)]), new Apply(16,[new PArg(2), new PArg(7)]), new Apply(17,[new PArg(3), new PArg(7)])], 7:[new Apply(8,[]), new Apply(10,[]), new Apply(11,[]), new Apply(13,[]), new Apply(18,[]), new Apply(30,[new PArg(7)]), new Apply(31,[])]},[new CncFun("'lindef Item'",[9]), new CncFun("'lindef Item'",[0]), new CncFun("'lindef Kind'",[9, 9]), new CncFun("'lindef Kind'",[0]), new CncFun("'lindef Phrase'",[9]), new CncFun("'lindef Phrase'",[0]), new CncFun("'lindef Quality'",[9, 9, 9, 9]), new CncFun("'lindef Quality'",[0]), new CncFun("Boring",[39, 38, 36, 37]), new CncFun("Cheese",[23, 22]), new CncFun("Delicious",[21, 20, 18, 19]), new CncFun("Expensive",[17, 16, 14, 15]), new CncFun("Fish",[40, 41]), new CncFun("Fresh",[27, 26, 24, 25]), new CncFun("Is",[3]), new CncFun("Is",[4]), new CncFun("Is",[1]), new CncFun("Is",[2]), new CncFun("Italian",[31, 30, 28, 29]), new CncFun("Pizza",[42, 43]), new CncFun("QKind",[5, 7]), new CncFun("QKind",[6, 8]), new CncFun("That",[45]), new CncFun("That",[46]), new CncFun("These",[50]), new CncFun("These",[49]), new CncFun("This",[51]), new CncFun("This",[48]), new CncFun("Those",[44]), new CncFun("Those",[47]), new CncFun("Very",[32, 33, 34, 35]), new CncFun("Warm",[13, 12, 10, 11]), new CncFun("Wine",[53, 52])],[[new SymCat(0, 0)],[new SymCat(0, 0), new SymKS("sono"), new SymCat(1, 1)],[new SymCat(0, 0), new SymKS("sono"), new SymCat(1, 3)],[new SymCat(0, 0), new SymKS("è"), new SymCat(1, 0)],[new SymCat(0, 0), new SymKS("è"), new SymCat(1, 2)],[new SymCat(1, 0), new SymCat(0, 0)],[new SymCat(1, 0), new SymCat(0, 2)],[new SymCat(1, 1), new SymCat(0, 1)],[new SymCat(1, 1), new SymCat(0, 3)],[new SymLit(0, 0)],[new SymKS("calda")],[new SymKS("calde")],[new SymKS("caldi")],[new SymKS("caldo")],[new SymKS("cara")],[new SymKS("care")],[new SymKS("cari")],[new SymKS("caro")],[new SymKS("deliziosa")],[new SymKS("deliziose")],[new SymKS("deliziosi")],[new SymKS("delizioso")],[new SymKS("formaggi")],[new SymKS("formaggio")],[new SymKS("fresca")],[new SymKS("fresche")],[new SymKS("freschi")],[new SymKS("fresco")],[new SymKS("italiana")],[new SymKS("italiane")],[new SymKS("italiani")],[new SymKS("italiano")],[new SymKS("molto"), new SymCat(0, 0)],[new SymKS("molto"), new SymCat(0, 1)],[new SymKS("molto"), new SymCat(0, 2)],[new SymKS("molto"), new SymCat(0, 3)],[new SymKS("noiosa")],[new SymKS("noiose")],[new SymKS("noiosi")],[new SymKS("noioso")],[new SymKS("pesce")],[new SymKS("pesci")],[new SymKS("pizza")],[new SymKS("pizze")],[new SymKS("quei"), new SymCat(0, 1)],[new SymKS("quel"), new SymCat(0, 0)],[new SymKS("quella"), new SymCat(0, 0)],[new SymKS("quelle"), new SymCat(0, 1)],[new SymKS("questa"), new SymCat(0, 0)],[new SymKS("queste"), new SymCat(0, 1)],[new SymKS("questi"), new SymCat(0, 1)],[new SymKS("questo"), new SymCat(0, 0)],[new SymKS("vini")],[new SymKS("vino")]],{Float:{s: -3, e: -3}, Int:{s: -2, e: -2}, Item:{s: 0, e: 3}, Kind:{s: 4, e: 5}, Phrase:{s: 6, e: 6}, Quality:{s: 7, e: 7}, String:{s: -1, e: -1}}, 8)});
diff --git a/src/runtime/javascript/translator.html b/src/runtime/javascript/translator.html
index d6b4ee3c1..b2c926550 100644
--- a/src/runtime/javascript/translator.html
+++ b/src/runtime/javascript/translator.html
@@ -32,7 +32,7 @@
Web-based GF Translator
-
+