1
0
forked from GitHub/gf-core

removed GF/deprecated

This commit is contained in:
aarne
2010-12-22 14:11:55 +00:00
parent ce15ec7b78
commit c5b9637695
726 changed files with 0 additions and 180266 deletions

View File

@@ -1,260 +0,0 @@
Code map for GF source files.
$Author: peb $
$Date: 2005/02/07 10:58:08 $
Directories:
[top level] GF main function and runtime-related modules
api high-level access to GF functionalities
canonical GFC (= GF Canonical) basic functionalities
cf context-free skeleton used in parsing
cfgm multilingual context-free skeleton exported to Java
compile compilation phases from GF to GFC
conversions [OBSOLETE] formats used in parser generation
for-ghc GHC-specific files (Glasgow Haskell Compiler)
for-hugs Hugs-specific files (a Haskell interpreter)
for-windows Windows-specific files (an operating system from Microsoft)
grammar basic functionalities of GF grammars used in compilation
infra GF-independent infrastructure and auxiliaries
newparsing parsing with GF grammars: current version (cf. parsing)
notrace debugging utilities for parser development (cf. trace)
parsers parsers of GF and GFC files
parsing [OBSOLETE] parsing with GF grammars: old version (cf. newparsing)
shell interaction shells
source utilities for reading in GF source files
speech generation of speech recognition grammars
trace debugging utilities for parser development (cf. notrace)
useGrammar grammar functionalities for applications
util utilities for using GF
Individual files:
GF.hs the Main module
GFModes.hs
HelpFile.hs [AUTO] help file generated by util/MkHelpFile
Today.hs [AUTO] file generated by "make today"
api/API.hs high-level access to GF functionalities
api/BatchTranslate.hs
api/GetMyTree.hs
api/GrammarToHaskell.hs
api/IOGrammar.hs
api/MyParser.hs slot for defining your own parser
canonical/AbsGFC.hs [AUTO] abstract syntax of GFC
canonical/CanonToGrammar.hs
canonical/CMacros.hs
canonical/ErrM.hs
canonical/GetGFC.hs
canonical/GFC.cf [LBNF] source of GFC parser
canonical/GFC.hs
canonical/LexGFC.hs
canonical/Look.hs
canonical/MkGFC.hs
canonical/PrExp.hs
canonical/PrintGFC.hs pretty-printer of GFC
canonical/Share.hs
canonical/SkelGFC.hs [AUTO]
canonical/TestGFC.hs [AUTO]
canonical/Unlex.hs
cf/CanonToCF.hs
cf/CF.hs abstract syntax of context-free grammars
cf/CFIdent.hs
cf/CFtoGrammar.hs
cf/CFtoSRG.hs
cf/ChartParser.hs the current default parsing method
cf/EBNF.hs
cf/PPrCF.hs
cf/PrLBNF.hs
cf/Profile.hs
cfgm/CFG.cf [LBNF] source
cfgm/AbsCFG.hs [AUTO]
cfgm/LexCFG.hs [AUTO]
cfgm/ParCFG.hs [AUTO]
cfgm/PrintCFG.hs [AUTO]
cfgm/PrintCFGrammar.hs
compile/CheckGrammar.hs
compile/Compile.hs the complete compiler pipeline
compile/Extend.hs
compile/GetGrammar.hs
compile/GrammarToCanon.hs
compile/MkResource.hs
compile/MkUnion.hs
compile/ModDeps.hs
compile/Optimize.hs
compile/PGrammar.hs
compile/PrOld.hs
compile/Rebuild.hs
compile/RemoveLiT.hs
compile/Rename.hs
compile/ShellState.hs the run-time multilingual grammar datastructure
compile/Update.hs
for-ghc/ArchEdit.hs
for-ghc/Arch.hs
for-ghc-nofud/ArchEdit.hs@
for-ghc-nofud/Arch.hs@
for-hugs/ArchEdit.hs
for-hugs/Arch.hs
for-hugs/JGF.hs
for-hugs/MoreCustom.hs
for-hugs/Unicode.hs
for-hugs/Arch.hs
for-hugs/ArchEdit.hs
for-hugs/JGF.hs
for-hugs/LexCFG.hs dummy CFG lexer
for-hugs/LexGF.hs dummy GF lexer
for-hugs/LexGFC.hs dummy GFC lexer
for-hugs/MoreCustom.hs
for-hugs/ParCFG.hs dummy CFG parser
for-hugs/ParGFC.hs dummy GFC parser
for-hugs/ParGF.hs dummy GF parser
for-hugs/Tracing.hs
for-hugs/Unicode.hs
for-windows/ArchEdit.hs
for-windows/Arch.hs
grammar/AbsCompute.hs
grammar/Abstract.hs GF and GFC abstract syntax datatypes
grammar/AppPredefined.hs
grammar/Compute.hs
grammar/Grammar.hs GF source grammar datatypes
grammar/LookAbs.hs
grammar/Lookup.hs
grammar/Macros.hs macros for creating GF terms and types
grammar/MMacros.hs more macros, mainly for abstract syntax
grammar/PatternMatch.hs
grammar/PrGrammar.hs the top-level grammar printer
grammar/Refresh.hs
grammar/ReservedWords.hs
grammar/TC.hs Coquand's type checking engine
grammar/TypeCheck.hs
grammar/Unify.hs
grammar/Values.hs
infra/Arabic.hs ASCII coding of Arabic Unicode
infra/Assoc.hs finite maps/association lists as binary search trees
infra/CheckM.hs
infra/Comments.hs
infra/Devanagari.hs ASCII coding of Devanagari Unicode
infra/ErrM.hs
infra/Ethiopic.hs
infra/EventF.hs
infra/ExtendedArabic.hs
infra/ExtraDiacritics.hs
infra/FudgetOps.hs
infra/Glue.hs
infra/Greek.hs
infra/Hebrew.hs
infra/Hiragana.hs
infra/Ident.hs
infra/LatinASupplement.hs
infra/Map.hs finite maps as red black trees
infra/Modules.hs
infra/OCSCyrillic.hs
infra/Operations.hs library of strings, search trees, error monads
infra/Option.hs
infra/OrdMap2.hs abstract class of finite maps + implementation as association lists
infra/OrdSet.hs abstract class of sets + implementation as sorted lists
infra/Parsers.hs
infra/ReadFiles.hs
infra/RedBlack.hs red black trees
infra/RedBlackSet.hs sets and maps as red black trees
infra/Russian.hs
infra/SortedList.hs sets as sorted lists
infra/Str.hs
infra/Tamil.hs
infra/Text.hs
infra/Trie2.hs
infra/Trie.hs
infra/UnicodeF.hs
infra/Unicode.hs
infra/UseIO.hs
infra/UTF8.hs UTF3 en/decoding
infra/Zipper.hs
newparsing/CFGrammar.hs type definitions for context-free grammars
newparsing/CFParserGeneral.hs several variants of general CFG chart parsing
newparsing/CFParserIncremental.hs several variants of incremental (Earley-style) CFG chart parsing
newparsing/ConvertGFCtoMCFG.hs converting GFC to MCFG
newparsing/ConvertGrammar.hs conversions between different grammar formats
newparsing/ConvertMCFGtoCFG.hs converting MCFG to CFG
newparsing/GeneralChart.hs Haskell framework for "parsing as deduction"
newparsing/GrammarTypes.hs instantiations of grammar types
newparsing/IncrementalChart.hs Haskell framework for incremental chart parsing
newparsing/MCFGrammar.hs type definitions for multiple CFG
newparsing/MCFParserBasic.hs MCFG chart parser
newparsing/MCFRange.hs ranges for MCFG parsing
newparsing/ParseCFG.hs parsing of CFG
newparsing/ParseCF.hs parsing of the CF format
newparsing/ParseGFC.hs parsing of GFC
newparsing/ParseMCFG.hs parsing of MCFG
newparsing/Parser.hs general definitions for parsers
newparsing/PrintParser.hs pretty-printing class for parsers
newparsing/PrintSimplifiedTerm.hs simplified pretty-printing for GFC terms
notrace/Tracing.hs tracing predicates when we DON'T want tracing capabilities (normal case)
parsers/ParGFC.hs [AUTO]
parsers/ParGF.hs [AUTO]
shell/CommandF.hs
shell/CommandL.hs line-based syntax of editor commands
shell/Commands.hs commands of GF editor shell
shell/IDE.hs
shell/JGF.hs
shell/PShell.hs
shell/ShellCommands.hs commands of GF main shell
shell/Shell.hs
shell/SubShell.hs
shell/TeachYourself.hs
source/AbsGF.hs [AUTO]
source/ErrM.hs
source/GF.cf [LBNF] source of GF parser
source/GrammarToSource.hs
source/LexGF.hs [AUTO]
source/PrintGF.hs [AUTO]
source/SourceToGrammar.hs
speech/PrGSL.hs
speech/PrJSGF.hs
speech/SRG.hs
speech/TransformCFG.hs
trace/Tracing.hs tracing predicates when we want tracing capabilities
translate/GFT.hs Main module of html-producing batch translator
useGrammar/Custom.hs database for customizable commands
useGrammar/Editing.hs
useGrammar/Generate.hs
useGrammar/GetTree.hs
useGrammar/Information.hs
useGrammar/Linear.hs the linearization algorithm
useGrammar/MoreCustom.hs
useGrammar/Morphology.hs
useGrammar/Paraphrases.hs
useGrammar/Parsing.hs the top-level parsing algorithm
useGrammar/Randomized.hs
useGrammar/RealMoreCustom.hs
useGrammar/Session.hs
useGrammar/TeachYourself.hs
useGrammar/Tokenize.hs lexer definitions (listed in Custom)
useGrammar/Transfer.hs
util/GFDoc.hs utility for producing LaTeX and HTML from GF
util/HelpFile source of ../HelpFile.hs
util/Htmls.hs utility for chopping a HTML document to slides
util/MkHelpFile.hs
util/WriteF.hs

File diff suppressed because it is too large Load Diff

View File

@@ -1,693 +0,0 @@
-- GF help file updated for GF 2.6, 17/6/2006.
-- *: Commands and options marked with * are currently not implemented.
--
-- Each command has a long and a short name, options, and zero or more
-- arguments. Commands are sorted by functionality. The short name is
-- given first.
-- Type "h -all" for full help file, "h <CommandName>" for full help on a command.
-- commands that change the state
i, import: i File
Reads a grammar from File and compiles it into a GF runtime grammar.
Files "include"d in File are read recursively, nubbing repetitions.
If a grammar with the same language name is already in the state,
it is overwritten - but only if compilation succeeds.
The grammar parser depends on the file name suffix:
.gf normal GF source
.gfc canonical GF
.gfr precompiled GF resource
.gfcm multilingual canonical GF
.gfe example-based grammar files (only with the -ex option)
.gfwl multilingual word list (preprocessed to abs + cncs)
.ebnf Extended BNF format
.cf Context-free (BNF) format
.trc TransferCore format
options:
-old old: parse in GF<2.0 format (not necessary)
-v verbose: give lots of messages
-s silent: don't give error messages
-src from source: ignore precompiled gfc and gfr files
-gfc from gfc: use compiled modules whenever they exist
-retain retain operations: read resource modules (needed in comm cc)
-nocf don't build old-style context-free grammar (default without HOAS)
-docf do build old-style context-free grammar (default with HOAS)
-nocheckcirc don't eliminate circular rules from CF
-cflexer build an optimized parser with separate lexer trie
-noemit do not emit code (default with old grammar format)
-o do emit code (default with new grammar format)
-ex preprocess .gfe files if needed
-prob read probabilities from top grammar file (format --# prob Fun Double)
-treebank read a treebank file to memory (xml format)
flags:
-abs set the name used for abstract syntax (with -old option)
-cnc set the name used for concrete syntax (with -old option)
-res set the name used for resource (with -old option)
-path use the (colon-separated) search path to find modules
-optimize select an optimization to override file-defined flags
-conversion select parsing method (values strict|nondet)
-probs read probabilities from file (format (--# prob) Fun Double)
-preproc use a preprocessor on each source file
-noparse read nonparsable functions from file (format --# noparse Funs)
examples:
i English.gf -- ordinary import of Concrete
i -retain german/ParadigmsGer.gf -- import of Resource to test
r, reload: r
Executes the previous import (i) command.
rl, remove_language: rl Language
Takes away the language from the state.
e, empty: e
Takes away all languages and resets all global flags.
sf, set_flags: sf Flag*
The values of the Flags are set for Language. If no language
is specified, the flags are set globally.
examples:
sf -nocpu -- stop showing CPU time
sf -lang=Swe -- make Swe the default concrete
s, strip: s
Prune the state by removing source and resource modules.
dc, define_command Name Anything
Add a new defined command. The Name must star with '%'. Later,
if 'Name X' is used, it is replaced by Anything where #1 is replaced
by X.
Restrictions: Currently at most one argument is possible, and a defined
command cannot appear in a pipe.
To see what definitions are in scope, use help -defs.
examples:
dc %tnp p -cat=NP -lang=Eng #1 | l -lang=Swe -- translate NPs
%tnp "this man" -- translate and parse
dt, define_term Name Tree
Add a constant for a tree. The constant can later be called by
prefixing it with '$'.
Restriction: These terms are not yet usable as a subterm.
To see what definitions are in scope, use help -defs.
examples:
p -cat=NP "this man" | dt tm -- define tm as parse result
l -all $tm -- linearize tm in all forms
-- commands that give information about the state
pg, print_grammar: pg
Prints the actual grammar (overridden by the -lang=X flag).
The -printer=X flag sets the format in which the grammar is
written.
N.B. since grammars are compiled when imported, this command
generally does not show the grammar in the same format as the
source. In particular, the -printer=latex is not supported.
Use the command tg -printer=latex File to print the source
grammar in LaTeX.
options:
-utf8 apply UTF8-encoding to the grammar
flags:
-printer
-lang
-startcat -- The start category of the generated grammar.
Only supported by some grammar printers.
examples:
pg -printer=cf -- show the context-free skeleton
pm, print_multigrammar: pm
Prints the current multilingual grammar in .gfcm form.
(Automatically executes the strip command (s) before doing this.)
options:
-utf8 apply UTF8 encoding to the tokens in the grammar
-utf8id apply UTF8 encoding to the identifiers in the grammar
examples:
pm | wf Letter.gfcm -- print the grammar into the file Letter.gfcm
pm -printer=graph | wf D.dot -- then do 'dot -Tps D.dot > D.ps'
vg, visualize_graph: vg
Show the dependency graph of multilingual grammar via dot and gv.
po, print_options: po
Print what modules there are in the state. Also
prints those flag values in the current state that differ from defaults.
pl, print_languages: pl
Prints the names of currently available languages.
pi, print_info: pi Ident
Prints information on the identifier.
-- commands that execute and show the session history
eh, execute_history: eh File
Executes commands in the file.
ph, print_history; ph
Prints the commands issued during the GF session.
The result is readable by the eh command.
examples:
ph | wf foo.hist" -- save the history into a file
-- linearization, parsing, translation, and computation
l, linearize: l PattList? Tree
Shows all linearization forms of Tree by the actual grammar
(which is overridden by the -lang flag).
The pattern list has the form [P, ... ,Q] where P,...,Q follow GF
syntax for patterns. All those forms are generated that match with the
pattern list. Too short lists are filled with variables in the end.
Only the -table flag is available if a pattern list is specified.
HINT: see GF language specification for the syntax of Pattern and Term.
You can also copy and past parsing results.
options:
-struct bracketed form
-table show parameters (not compatible with -record, -all)
-record record, i.e. explicit GF concrete syntax term (not compatible with -table, -all)
-all show all forms and variants (not compatible with -record, -table)
-multi linearize to all languages (can be combined with the other options)
flags:
-lang linearize in this grammar
-number give this number of forms at most
-unlexer filter output through unlexer
examples:
l -lang=Swe -table -- show full inflection table in Swe
p, parse: p String
Shows all Trees returned for String by the actual
grammar (overridden by the -lang flag), in the category S (overridden
by the -cat flag).
options for batch input:
-lines parse each line of input separately, ignoring empty lines
-all as -lines, but also parse empty lines
-prob rank results by probability
-cut stop after first lexing result leading to parser success
-fail show strings whose parse fails prefixed by #FAIL
-ambiguous show strings that have more than one parse prefixed by #AMBIGUOUS
options for selecting parsing method:
-fcfg parse using a fast variant of MCFG (default is no HOAS in grammar)
-old parse using an overgenerating CFG (default if HOAS in grammar)
-cfg parse using a much less overgenerating CFG
-mcfg parse using an even less overgenerating MCFG
Note: the first time parsing with -cfg, -mcfg, and -fcfg may take a long time
options that only work for the -old default parsing method:
-n non-strict: tolerates morphological errors
-ign ignore unknown words when parsing
-raw return context-free terms in raw form
-v verbose: give more information if parsing fails
flags:
-cat parse in this category
-lang parse in this grammar
-lexer filter input through this lexer
-parser use this parsing strategy
-number return this many results at most
examples:
p -cat=S -mcfg "jag är gammal" -- parse an S with the MCFG
rf examples.txt | p -lines -- parse each non-empty line of the file
at, apply_transfer: at (Module.Fun | Fun)
Transfer a term using Fun from Module, or the topmost transfer
module. Transfer modules are given in the .trc format. They are
shown by the 'po' command.
flags:
-lang typecheck the result in this lang instead of default lang
examples:
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
tb, tree_bank: tb
Generate a multilingual treebank from a list of trees (default) or compare
to an existing treebank.
options:
-c compare to existing xml-formatted treebank
-trees return the trees of the treebank
-all show all linearization alternatives (branches and variants)
-table show tables of linearizations with parameters
-record show linearization records
-xml wrap the treebank (or comparison results) with XML tags
-mem write the treebank in memory instead of a file TODO
examples:
gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
rf tb.xml | tb -c -- compare-test treebank from file
rf old.xml | tb -trees | tb -xml -- create new treebank from old
ut, use_treebank: ut String
Lookup a string in a treebank and return the resulting trees.
Use 'tb' to create a treebank and 'i -treebank' to read one from
a file.
options:
-assocs show all string-trees associations in the treebank
-strings show all strings in the treebank
-trees show all trees in the treebank
-raw return the lookup result as string, without typechecking it
flags:
-treebank use this treebank (instead of the latest introduced one)
examples:
ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation
ut -assocs | grep "ComplV2" -- show all associations with ComplV2
tt, test_tokenizer: tt String
Show the token list sent to the parser when String is parsed.
HINT: can be useful when debugging the parser.
flags:
-lexer use this lexer
examples:
tt -lexer=codelit "2*(x + 3)" -- a favourite lexer for program code
g, grep: g String1 String2
Grep the String1 in the String2. String2 is read line by line,
and only those lines that contain String1 are returned.
flags:
-v return those lines that do not contain String1.
examples:
pg -printer=cf | grep "mother" -- show cf rules with word mother
cc, compute_concrete: cc Term
Compute a term by concrete syntax definitions. Uses the topmost
resource module (the last in listing by command po) to resolve
constant names.
N.B. You need the flag -retain when importing the grammar, if you want
the oper definitions to be retained after compilation; otherwise this
command does not expand oper constants.
N.B.' The resulting Term is not a term in the sense of abstract syntax,
and hence not a valid input to a Tree-demanding command.
flags:
-table show output in a similar readable format as 'l -table'
-res use another module than the topmost one
examples:
cc -res=ParadigmsFin (nLukko "hyppy") -- inflect "hyppy" with nLukko
so, show_operations: so Type
Show oper operations with the given value type. Uses the topmost
resource module to resolve constant names.
N.B. You need the flag -retain when importing the grammar, if you want
the oper definitions to be retained after compilation; otherwise this
command does not find any oper constants.
N.B.' The value type may not be defined in a supermodule of the
topmost resource. In that case, use appropriate qualified name.
flags:
-res use another module than the topmost one
examples:
so -res=ParadigmsFin ResourceFin.N -- show N-paradigms in ParadigmsFin
t, translate: t Lang Lang String
Parses String in Lang1 and linearizes the resulting Trees in Lang2.
flags:
-cat
-lexer
-parser
examples:
t Eng Swe -cat=S "every number is even or odd"
gr, generate_random: gr Tree?
Generates a random Tree of a given category. If a Tree
argument is given, the command completes the Tree with values to
the metavariables in the tree.
options:
-prob use probabilities (works for nondep types only)
-cf use a very fast method (works for nondep types only)
flags:
-cat generate in this category
-lang use the abstract syntax of this grammar
-number generate this number of trees (not impl. with Tree argument)
-depth use this number of search steps at most
examples:
gr -cat=Query -- generate in category Query
gr (PredVP ? (NegVG ?)) -- generate a random tree of this form
gr -cat=S -tr | l -- gererate and linearize
gt, generate_trees: gt Tree?
Generates all trees up to a given depth. If the depth is large,
a small -alts is recommended. If a Tree argument is given, the
command completes the Tree with values to the metavariables in
the tree.
options:
-metas also return trees that include metavariables
-all generate all (can be infinitely many, lazily)
-lin linearize result of -all (otherwise, use pipe to linearize)
flags:
-depth generate to this depth (default 3)
-atoms take this number of atomic rules of each category (default unlimited)
-alts take this number of alternatives at each branch (default unlimited)
-cat generate in this category
-nonub don't remove duplicates (faster, not effective with -mem)
-mem use a memorizing algorithm (often faster, usually more memory-consuming)
-lang use the abstract syntax of this grammar
-number generate (at most) this number of trees (also works with -all)
-noexpand don't expand these categories (comma-separated, e.g. -noexpand=V,CN)
-doexpand only expand these categories (comma-separated, e.g. -doexpand=V,CN)
examples:
gt -depth=10 -cat=NP -- generate all NP's to depth 10
gt (PredVP ? (NegVG ?)) -- generate all trees of this form
gt -cat=S -tr | l -- generate and linearize
gt -noexpand=NP | l -mark=metacat -- the only NP is meta, linearized "?0 +NP"
gt | l | p -lines -ambiguous | grep "#AMBIGUOUS" -- show ambiguous strings
ma, morphologically_analyse: ma String
Runs morphological analysis on each word in String and displays
the results line by line.
options:
-short show analyses in bracketed words, instead of separate lines
-status show just the work at success, prefixed with "*" at failure
flags:
-lang
examples:
wf Bible.txt | ma -short | wf Bible.tagged -- analyse the Bible
-- elementary generation of Strings and Trees
ps, put_string: ps String
Returns its argument String, like Unix echo.
HINT. The strength of ps comes from the possibility to receive the
argument from a pipeline, and altering it by the -filter flag.
flags:
-filter filter the result through this string processor
-length cut the string after this number of characters
examples:
gr -cat=Letter | l | ps -filter=text -- random letter as text
pt, put_tree: pt Tree
Returns its argument Tree, like a specialized Unix echo.
HINT. The strength of pt comes from the possibility to receive
the argument from a pipeline, and altering it by the -transform flag.
flags:
-transform transform the result by this term processor
-number generate this number of terms at most
examples:
p "zero is even" | pt -transform=solve -- solve ?'s in parse result
* st, show_tree: st Tree
Prints the tree as a string. Unlike pt, this command cannot be
used in a pipe to produce a tree, since its output is a string.
flags:
-printer show the tree in a special format (-printer=xml supported)
wt, wrap_tree: wt Fun
Wraps the tree as the sole argument of Fun.
flags:
-c compute the resulting new tree to normal form
vt, visualize_tree: vt Tree
Shows the abstract syntax tree via dot and gv (via temporary files
grphtmp.dot, grphtmp.ps).
flags:
-c show categories only (no functions)
-f show functions only (no categories)
-g show as graph (sharing uses of the same function)
-o just generate the .dot file
examples:
p "hello world" | vt -o | wf my.dot ;; ! open -a GraphViz my.dot
-- This writes the parse tree into my.dot and opens the .dot file
-- with another application without generating .ps.
-- subshells
es, editing_session: es
Opens an interactive editing session.
N.B. Exit from a Fudget session is to the Unix shell, not to GF.
options:
-f Fudget GUI (necessary for Unicode; only available in X Window System)
ts, translation_session: ts
Translates input lines from any of the actual languages to all other ones.
To exit, type a full stop (.) alone on a line.
N.B. Exit from a Fudget session is to the Unix shell, not to GF.
HINT: Set -parser and -lexer locally in each grammar.
options:
-f Fudget GUI (necessary for Unicode; only available in X Windows)
-lang prepend translation results with language names
flags:
-cat the parser category
examples:
ts -cat=Numeral -lang -- translate numerals, show language names
tq, translation_quiz: tq Lang Lang
Random-generates translation exercises from Lang1 to Lang2,
keeping score of success.
To interrupt, type a full stop (.) alone on a line.
HINT: Set -parser and -lexer locally in each grammar.
flags:
-cat
examples:
tq -cat=NP TestResourceEng TestResourceSwe -- quiz for NPs
tl, translation_list: tl Lang Lang
Random-generates a list of ten translation exercises from Lang1
to Lang2. The number can be changed by a flag.
HINT: use wf to save the exercises in a file.
flags:
-cat
-number
examples:
tl -cat=NP TestResourceEng TestResourceSwe -- quiz list for NPs
mq, morphology_quiz: mq
Random-generates morphological exercises,
keeping score of success.
To interrupt, type a full stop (.) alone on a line.
HINT: use printname judgements in your grammar to
produce nice expressions for desired forms.
flags:
-cat
-lang
examples:
mq -cat=N -lang=TestResourceSwe -- quiz for Swedish nouns
ml, morphology_list: ml
Random-generates a list of ten morphological exercises,
keeping score of success. The number can be changed with a flag.
HINT: use wf to save the exercises in a file.
flags:
-cat
-lang
-number
examples:
ml -cat=N -lang=TestResourceSwe -- quiz list for Swedish nouns
-- IO related commands
rf, read_file: rf File
Returns the contents of File as a String; error if File does not exist.
wf, write_file: wf File String
Writes String into File; File is created if it does not exist.
N.B. the command overwrites File without a warning.
af, append_file: af File
Writes String into the end of File; File is created if it does not exist.
* tg, transform_grammar: tg File
Reads File, parses as a grammar,
but instead of compiling further, prints it.
The environment is not changed. When parsing the grammar, the same file
name suffixes are supported as in the i command.
HINT: use this command to print the grammar in
another format (the -printer flag); pipe it to wf to save this format.
flags:
-printer (only -printer=latex supported currently)
* cl, convert_latex: cl File
Reads File, which is expected to be in LaTeX form.
Three environments are treated in special ways:
\begGF - \end{verbatim}, which contains GF judgements,
\begTGF - \end{verbatim}, which contains a GF expression (displayed)
\begInTGF - \end{verbatim}, which contains a GF expressions (inlined).
Moreover, certain macros should be included in the file; you can
get those macros by applying 'tg -printer=latex foo.gf' to any grammar
foo.gf. Notice that the same File can be imported as a GF grammar,
consisting of all the judgements in \begGF environments.
HINT: pipe with 'wf Foo.tex' to generate a new Latex file.
sa, speak_aloud: sa String
Uses the Flite speech generator to produce speech for String.
Works for American English spelling.
examples:
h | sa -- listen to the list of commands
gr -cat=S | l | sa -- generate a random sentence and speak it aloud
si, speech_input: si
Uses an ATK speech recognizer to get speech input.
flags:
-lang: The grammar to use with the speech recognizer.
-cat: The grammar category to get input in.
-language: Use acoustic model and dictionary for this language.
-number: The number of utterances to recognize.
h, help: h Command?
Displays the paragraph concerning the command from this help file.
Without the argument, shows the first lines of all paragraphs.
options
-all show the whole help file
-defs show user-defined commands and terms
-FLAG show the values of FLAG (works for grammar-independent flags)
examples:
h print_grammar -- show all information on the pg command
q, quit: q
Exits GF.
HINT: you can use 'ph | wf history' to save your session.
!, system_command: ! String
Issues a system command. No value is returned to GF.
example:
! ls
?, system_command: ? String
Issues a system command that receives its arguments from GF pipe
and returns a value to GF.
example:
h | ? 'wc -l' | p -cat=Num
-- Flags. The availability of flags is defined separately for each command.
-cat, category in which parsing is performed.
The default is S.
-depth, the search depth in e.g. random generation.
The default depends on application.
-filter, operation performed on a string. The default is identity.
-filter=identity no change
-filter=erase erase the text
-filter=take100 show the first 100 characters
-filter=length show the length of the string
-filter=text format as text (punctuation, capitalization)
-filter=code format as code (spacing, indentation)
-lang, grammar used when executing a grammar-dependent command.
The default is the last-imported grammar.
-language, voice used by Festival as its --language flag in the sa command.
The default is system-dependent.
-length, the maximum number of characters shown of a string.
The default is unlimited.
-lexer, tokenization transforming a string into lexical units for a parser.
The default is words.
-lexer=words tokens are separated by spaces or newlines
-lexer=literals like words, but GF integer and string literals recognized
-lexer=vars like words, but "x","x_...","$...$" as vars, "?..." as meta
-lexer=chars each character is a token
-lexer=code use Haskell's lex
-lexer=codevars like code, but treat unknown words as variables, ?? as meta
-lexer=textvars like text, but treat unknown words as variables, ?? as meta
-lexer=text with conventions on punctuation and capital letters
-lexer=codelit like code, but treat unknown words as string literals
-lexer=textlit like text, but treat unknown words as string literals
-lexer=codeC use a C-like lexer
-lexer=ignore like literals, but ignore unknown words
-lexer=subseqs like ignore, but then try all subsequences from longest
-number, the maximum number of generated items in a list.
The default is unlimited.
-optimize, optimization on generated code.
The default is share for concrete, none for resource modules.
Each of the flags can have the suffix _subs, which performs
common subexpression elimination after the main optimization.
Thus, -optimize=all_subs is the most aggressive one. The _subs
strategy only works in GFC, and applies therefore in concrete but
not in resource modules.
-optimize=share share common branches in tables
-optimize=parametrize first try parametrize then do share with the rest
-optimize=values represent tables as courses-of-values
-optimize=all first try parametrize then do values with the rest
-optimize=none no optimization
-parser, parsing strategy. The default is chart. If -cfg or -mcfg are
selected, only bottomup and topdown are recognized.
-parser=chart bottom-up chart parsing
-parser=bottomup a more up to date bottom-up strategy
-parser=topdown top-down strategy
-parser=old an old bottom-up chart parser
-printer, format in which the grammar is printed. The default is
gfc. Those marked with M are (only) available for pm, the rest
for pg.
-printer=gfc GFC grammar
-printer=gf GF grammar
-printer=old old GF grammar
-printer=cf context-free grammar, with profiles
-printer=bnf context-free grammar, without profiles
-printer=lbnf labelled context-free grammar for BNF Converter
-printer=plbnf grammar for BNF Converter, with precedence levels
*-printer=happy source file for Happy parser generator (use lbnf!)
-printer=haskell abstract syntax in Haskell, with transl to/from GF
-printer=haskell_gadt abstract syntax GADT in Haskell, with transl to/from GF
-printer=morpho full-form lexicon, long format
*-printer=latex LaTeX file (for the tg command)
-printer=fullform full-form lexicon, short format
*-printer=xml XML: DTD for the pg command, object for st
-printer=old old GF: file readable by GF 1.2
-printer=stat show some statistics of generated GFC
-printer=probs show probabilities of all functions
-printer=gsl Nuance GSL speech recognition grammar
-printer=jsgf Java Speech Grammar Format
-printer=jsgf_sisr_old Java Speech Grammar Format with semantic tags in
SISR WD 20030401 format
-printer=srgs_abnf SRGS ABNF format
-printer=srgs_abnf_non_rec SRGS ABNF format, without any recursion.
-printer=srgs_abnf_sisr_old SRGS ABNF format, with semantic tags in
SISR WD 20030401 format
-printer=srgs_xml SRGS XML format
-printer=srgs_xml_non_rec SRGS XML format, without any recursion.
-printer=srgs_xml_prob SRGS XML format, with weights
-printer=srgs_xml_sisr_old SRGS XML format, with semantic tags in
SISR WD 20030401 format
-printer=vxml Generate a dialogue system in VoiceXML.
-printer=slf a finite automaton in the HTK SLF format
-printer=slf_graphviz the same automaton as slf, but in Graphviz format
-printer=slf_sub a finite automaton with sub-automata in the
HTK SLF format
-printer=slf_sub_graphviz the same automaton as slf_sub, but in
Graphviz format
-printer=fa_graphviz a finite automaton with labelled edges
-printer=regular a regular grammar in a simple BNF
-printer=unpar a gfc grammar with parameters eliminated
-printer=functiongraph abstract syntax functions in 'dot' format
-printer=typegraph abstract syntax categories in 'dot' format
-printer=transfer Transfer language datatype (.tr file format)
-printer=cfg-prolog M cfg in prolog format (also pg)
-printer=gfc-prolog M gfc in prolog format (also pg)
-printer=gfcm M gfcm file (default for pm)
-printer=graph M module dependency graph in 'dot' (graphviz) format
-printer=header M gfcm file with header (for GF embedded in Java)
-printer=js M JavaScript type annotator and linearizer
-printer=mcfg-prolog M mcfg in prolog format (also pg)
-printer=missing M the missing linearizations of each concrete
-startcat, like -cat, but used in grammars (to avoid clash with keyword cat)
-transform, transformation performed on a syntax tree. The default is identity.
-transform=identity no change
-transform=compute compute by using definitions in the grammar
-transform=nodup return the term only if it has no constants duplicated
-transform=nodupatom return the term only if it has no atomic constants duplicated
-transform=typecheck return the term only if it is type-correct
-transform=solve solve metavariables as derived refinements
-transform=context solve metavariables by unique refinements as variables
-transform=delete replace the term by metavariable
-unlexer, untokenization transforming linearization output into a string.
The default is unwords.
-unlexer=unwords space-separated token list (like unwords)
-unlexer=text format as text: punctuation, capitals, paragraph <p>
-unlexer=code format as code (spacing, indentation)
-unlexer=textlit like text, but remove string literal quotes
-unlexer=codelit like code, but remove string literal quotes
-unlexer=concat remove all spaces
-unlexer=bind like identity, but bind at "&+"
-mark, marking of parts of tree in linearization. The default is none.
-mark=metacat append "+CAT" to every metavariable, showing its category
-mark=struct show tree structure with brackets
-mark=java show tree structure with XML tags (used in gfeditor)
-coding, Some grammars are in UTF-8, some in isolatin-1.
If the letters ä (a-umlaut) and ö (o-umlaut) look strange, either
change your terminal to isolatin-1, or rewrite the grammar with
'pg -utf8'.
-- *: Commands and options marked with * are not currently implemented.

View File

@@ -1,250 +0,0 @@
include config.mk
GHMAKE=$(GHC) --make
GHCXMAKE=ghcxmake
GHCFLAGS+= -fglasgow-exts
GHCOPTFLAGS=-O2
GHCFUDFLAG=
DIST_DIR=GF-$(PACKAGE_VERSION)
NOT_IN_DIST= \
grammars \
download \
doc/release2.html \
src/tools/AlphaConvGF.hs
BIN_DIST_DIR=$(DIST_DIR)-$(host)
GRAMMAR_PACKAGE_VERSION=$(shell date +%Y%m%d)
GRAMMAR_DIST_DIR=gf-grammars-$(GRAMMAR_PACKAGE_VERSION)
MSI_FILE=gf-$(subst .,_,$(PACKAGE_VERSION)).msi
GF_DATA_DIR=$(datadir)/GF-$(PACKAGE_VERSION)
GF_LIB_DIR=$(GF_DATA_DIR)/lib
EMBED = GF/Embed/TemplateApp
# use the temporary binary file name 'gf-bin' to not clash with directory 'GF'
# on case insensitive file systems (such as FAT)
GF_EXE=gf$(EXEEXT)
GF_EXE_TMP=gf-bin$(EXEEXT)
GF_DOC_EXE=gfdoc$(EXEEXT)
ifeq ("$(READLINE)","readline")
GHCFLAGS += -package readline -DUSE_READLINE
endif
ifneq ("$(CPPFLAGS)","")
GHCFLAGS += $(addprefix -optP, $(CPPFLAGS))
endif
ifneq ("$(LDFLAGS)","")
GHCFLAGS += $(addprefix -optl, $(LDFLAGS))
endif
ifeq ("$(INTERRUPT)","yes")
GHCFLAGS += -DUSE_INTERRUPT
endif
ifeq ("$(ATK)","yes")
GHCFLAGS += -DUSE_ATK
endif
ifeq ("$(ENABLE_JAVA)", "yes")
BUILD_JAR=jar
else
BUILD_JAR=
endif
.PHONY: all unix jar tags gfdoc windows install install-gf \
lib temp install-gfdoc \
today help clean windows-msi dist gfc
all: unix gfc lib
static: GHCFLAGS += -optl-static
static: unix
gf: unix
unix: today opt
windows: unix
temp: today noopt
build:
$(GHMAKE) $(GHCFLAGS) GF.hs -o $(GF_EXE_TMP)
strip $(GF_EXE_TMP)
mv $(GF_EXE_TMP) ../bin/$(GF_EXE)
opt: GHCFLAGS += $(GHCOPTFLAGS)
opt: build
embed: GHCFLAGS += $(GHCOPTFLAGS)
embed:
$(GHMAKE) $(GHCFLAGS) $(EMBED) -o $(EMBED)
strip $(EMBED)
noopt: build
clean:
find . '(' -name '*~' -o -name '*.hi' -o -name '*.ghi' -o -name '*.o' ')' -exec rm -f '{}' ';'
-rm -f gf.wixobj
-rm -f ../bin/$(GF_EXE)
$(MAKE) -C tools/c clean
$(MAKE) -C ../lib/c clean
-rm -f ../bin/gfcc2c
distclean: clean
-rm -f tools/$(GF_DOC_EXE)
-rm -f config.status config.mk config.log
-rm -f *.tgz *.zip
-rm -rf $(DIST_DIR) $(BIN_DIST_DIR)
-rm -rf gf.wxs *.msi
today:
echo 'module Paths_gf (version, getDataDir) where' > Paths_gf.hs
echo 'import Data.Version' >> Paths_gf.hs
echo '{-# NOINLINE version #-}' >> Paths_gf.hs
echo 'version :: Version' >> Paths_gf.hs
echo 'version = Version {versionBranch = [3,0], versionTags = ["beta3"]}' >> Paths_gf.hs
echo 'getDataDir = return "$(GF_DATA_DIR)" :: IO FilePath' >> Paths_gf.hs
showflags:
@echo $(GHCFLAGS)
# added by peb:
tracing: GHCFLAGS += -DTRACING
tracing: temp
ghci-trace: GHCFLAGS += -DTRACING
ghci-trace: ghci
#touch-files:
# rm -f GF/System/Tracing.{hi,o}
# touch GF/System/Tracing.hs
# profiling
prof: GHCOPTFLAGS += -prof -auto-all
prof: unix
tags:
find GF Transfer -name '*.hs' | xargs hasktags
#
# Help file
#
tools/MkHelpFile: tools/MkHelpFile.hs
$(GHMAKE) -o $@ $^
help: GF/Shell/HelpFile.hs
GF/Shell/HelpFile.hs: tools/MkHelpFile HelpFile
tools/MkHelpFile
#
# Tools
#
gfdoc: tools/$(GF_DOC_EXE)
tools/$(GF_DOC_EXE): tools/GFDoc.hs
$(GHMAKE) $(GHCOPTFLAGS) -o $@ $^
gfc: gf
echo GFC!
cp -f gfc ../bin/
chmod a+x ../bin/gfc
gfcc2c:
$(MAKE) -C tools/c
$(MAKE) -C ../lib/c
mv tools/c/gfcc2c ../bin
#
# Resource grammars
#
lib:
$(MAKE) -C ../lib/resource clean all
#
# Distribution
#
dist:
-rm -rf $(DIST_DIR)
darcs dist --dist-name=$(DIST_DIR)
tar -zxf ../$(DIST_DIR).tar.gz
rm ../$(DIST_DIR).tar.gz
cd $(DIST_DIR)/src && perl -pi -e "s/^AC_INIT\(\[GF\],\[[^\]]*\]/AC_INIT([GF],[$(PACKAGE_VERSION)]/" configure.ac
cd $(DIST_DIR)/src && autoconf && rm -rf autom4te.cache
# cd $(DIST_DIR)/grammars && sh mkLib.sh
cd $(DIST_DIR) && rm -rf $(NOT_IN_DIST)
$(TAR) -zcf $(DIST_DIR).tgz $(DIST_DIR)
rm -rf $(DIST_DIR)
snapshot: PACKAGE_VERSION=$(shell date +%Y%m%d)
snapshot: DIST_DIR=GF-$(PACKAGE_VERSION)
snapshot: dist
rpm: dist
rpmbuild -ta $(DIST_DIR).tgz
binary-dist:
rm -rf $(BIN_DIST_DIR)
mkdir $(BIN_DIST_DIR)
mkdir $(BIN_DIST_DIR)/lib
./configure --host="$(host)" --build="$(build)"
$(MAKE) gfc gfdoc
$(INSTALL) ../bin/$(GF_EXE) tools/$(GF_DOC_EXE) $(BIN_DIST_DIR)
$(INSTALL) configure config.guess config.sub install-sh config.mk.in $(BIN_DIST_DIR)
$(INSTALL) gfc.in $(BIN_DIST_DIR)
$(INSTALL) -m 0644 ../README ../LICENSE $(BIN_DIST_DIR)
$(INSTALL) -m 0644 INSTALL.binary $(BIN_DIST_DIR)/INSTALL
$(INSTALL) -m 0644 Makefile.binary $(BIN_DIST_DIR)/Makefile
# $(TAR) -C $(BIN_DIST_DIR)/lib -zxf ../lib/compiled.tgz
$(TAR) -zcf GF-$(PACKAGE_VERSION)-$(host).tgz $(BIN_DIST_DIR)
rm -rf $(BIN_DIST_DIR)
grammar-dist:
-rm -rf $(GRAMMAR_DIST_DIR)
mkdir $(GRAMMAR_DIST_DIR)
cp -r ../_darcs/current/{lib,examples} $(GRAMMAR_DIST_DIR)
$(MAKE) GF_LIB_PATH=.. -C $(GRAMMAR_DIST_DIR)/lib/resource-1.0 show-path prelude present alltenses mathematical api multimodal langs
$(TAR) -zcf $(GRAMMAR_DIST_DIR).tgz $(GRAMMAR_DIST_DIR)
rm -rf $(GRAMMAR_DIST_DIR)
gf.wxs: config.status gf.wxs.in
./config.status --file=$@
windows-msi: gf.wxs
candle -nologo gf.wxs
light -nologo -o $(MSI_FILE) gf.wixobj
#
# Installation
#
install: install-gf install-gfdoc install-lib
install-gf:
$(INSTALL) -d $(bindir)
$(INSTALL) ../bin/$(GF_EXE) $(bindir)
install-gfdoc:
$(INSTALL) -d $(bindir)
$(INSTALL) tools/$(GF_DOC_EXE) $(bindir)
install-lib:
$(INSTALL) -d $(GF_LIB_DIR)
$(TAR) -C $(GF_LIB_DIR) -zxf ../lib/compiled.tgz

View File

@@ -1,20 +0,0 @@
include config.mk
GF_DATA_DIR=$(datadir)/GF-$(PACKAGE_VERSION)
GF_LIB_DIR=$(GF_DATA_DIR)/lib
.PHONY: install uninstall
install:
$(INSTALL) -d $(bindir)
$(INSTALL) gf$(EXEEXT) gfdoc$(EXEEXT) $(bindir)
$(INSTALL) gfc$(EXEEXT) $(bindir)
$(INSTALL) -d $(GF_DATA_DIR)
cp -r lib $(GF_DATA_DIR)
uninstall:
-rm -f $(bindir)/gf$(EXEEXT) $(bindir)/gfdoc$(EXEEXT)
-rm -f $GF_LIB_DIR)/*/*.gf{o}
-rmdir $(GF_LIB_DIR)/*
-rmdir $(GF_LIB_DIR)
-rmdir $(GF_DATA_DIR)

View File

@@ -1,13 +0,0 @@
concrete Eng of Ex = {
lincat
S = {s : Str} ;
NP = {s : Str ; n : Num} ;
VP = {s : Num => Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = {s = np.s ++ vp.s ! np.n} ;
She = {s = "she" ; n = Sg} ;
They = {s = "they" ; n = Pl} ;
Sleep = {s = table {Sg => "sleeps" ; Pl => "sleep"}} ;
}

View File

@@ -1,8 +0,0 @@
abstract Ex = {
cat
S ; NP ; VP ;
fun
Pred : NP -> VP -> S ;
She, They : NP ;
Sleep : VP ;
}

View File

@@ -1,13 +0,0 @@
concrete Swe of Ex = {
lincat
S = {s : Str} ;
NP = {s : Str} ;
VP = {s : Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = {s = np.s ++ vp.s} ;
She = {s = "hon"} ;
They = {s = "de"} ;
Sleep = {s = "sover"} ;
}

View File

@@ -1,64 +0,0 @@
-- to test GFCC compilation
flags coding=utf8 ;
cat S ; NP ; N ; VP ;
fun Pred : NP -> VP -> S ;
fun Pred2 : NP -> VP -> NP -> S ;
fun Det, Dets : N -> NP ;
fun Mina, Sina, Me, Te : NP ;
fun Raha, Paska, Pallo : N ;
fun Puhua, Munia, Sanoa : VP ;
param Person = P1 | P2 | P3 ;
param Number = Sg | Pl ;
param Case = Nom | Part ;
param NForm = NF Number Case ;
param VForm = VF Number Person ;
lincat N = Noun ;
lincat VP = Verb ;
oper Noun = {s : NForm => Str} ;
oper Verb = {s : VForm => Str} ;
lincat NP = {s : Case => Str ; a : {n : Number ; p : Person}} ;
lin Pred np vp = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p} ;
lin Pred2 np vp ob = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p ++ ob.s ! Part} ;
lin Det no = {s = \\c => no.s ! NF Sg c ; a = {n = Sg ; p = P3}} ;
lin Dets no = {s = \\c => no.s ! NF Pl c ; a = {n = Pl ; p = P3}} ;
lin Mina = {s = table Case ["minä" ; "minua"] ; a = {n = Sg ; p = P1}} ;
lin Te = {s = table Case ["te" ; "teitä"] ; a = {n = Pl ; p = P2}} ;
lin Sina = {s = table Case ["sinä" ; "sinua"] ; a = {n = Sg ; p = P2}} ;
lin Me = {s = table Case ["me" ; "meitä"] ; a = {n = Pl ; p = P1}} ;
lin Raha = mkN "raha" ;
lin Paska = mkN "paska" ;
lin Pallo = mkN "pallo" ;
lin Puhua = mkV "puhu" ;
lin Munia = mkV "muni" ;
lin Sanoa = mkV "sano" ;
oper mkN : Str -> Noun = \raha -> {
s = table {
NF Sg Nom => raha ;
NF Sg Part => raha + "a" ;
NF Pl Nom => raha + "t" ;
NF Pl Part => Predef.tk 1 raha + "oja"
}
} ;
oper mkV : Str -> Verb = \puhu -> {
s = table {
VF Sg P1 => puhu + "n" ;
VF Sg P2 => puhu + "t" ;
VF Sg P3 => puhu + Predef.dp 1 puhu ;
VF Pl P1 => puhu + "mme" ;
VF Pl P2 => puhu + "tte" ;
VF Pl P3 => puhu + "vat"
}
} ;

View File

@@ -1,809 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<TITLE>The GFCC Grammar Format</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>The GFCC Grammar Format</H1>
<FONT SIZE="4">
<I>Aarne Ranta</I><BR>
October 5, 2007
</FONT></CENTER>
<P>
Author's address:
<A HREF="http://www.cs.chalmers.se/~aarne"><CODE>http://www.cs.chalmers.se/~aarne</CODE></A>
</P>
<P>
History:
</P>
<UL>
<LI>5 Oct 2007: new, better structured GFCC with full expressive power
<LI>19 Oct: translation of lincats, new figures on C++
<LI>3 Oct 2006: first version
</UL>
<H2>What is GFCC</H2>
<P>
GFCC is a low-level format for GF grammars. Its aim is to contain the minimum
that is needed to process GF grammars at runtime. This minimality has three
advantages:
</P>
<UL>
<LI>compact grammar files and run-time objects
<LI>time and space efficient processing
<LI>simple definition of interpreters
</UL>
<P>
Thus we also want to call GFCC the <B>portable grammar format</B>.
</P>
<P>
The idea is that all embedded GF applications use GFCC.
The GF system would be primarily used as a compiler and as a grammar
development tool.
</P>
<P>
Since GFCC is implemented in BNFC, a parser of the format is readily
available for C, C++, C#, Haskell, Java, and OCaml. Also an XML
representation can be generated in BNFC. A
<A HREF="../">reference implementation</A>
of linearization and some other functions has been written in Haskell.
</P>
<H2>GFCC vs. GFC</H2>
<P>
GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed
to be a run-time format, but also to
support separate compilation of grammars, i.e.
to store the results of compiling
individual GF modules. But this means that GFC has to contain extra information,
such as type annotations, which is only needed in compilation and not at
run-time. In particular, the pattern matching syntax and semantics of GFC is
complex and therefore difficult to implement in new platforms.
</P>
<P>
Actually, GFC is planned to be omitted also as the target format of
separate compilation, where plain GF (type annotated and partially evaluated)
will be used instead. GFC provides only marginal advantages as a target format
compared with GF, and it is therefore just extra weight to carry around this
format.
</P>
<P>
The main differences of GFCC compared with GFC (and GF) can be summarized as follows:
</P>
<UL>
<LI>there are no modules, and therefore no qualified names
<LI>a GFCC grammar is multilingual, and consists of a common abstract syntax
together with one concrete syntax per language
<LI>records and tables are replaced by arrays
<LI>record labels and parameter values are replaced by integers
<LI>record projection and table selection are replaced by array indexing
<LI>even though the format does support dependent types and higher-order abstract
syntax, there is no interpreted yet that does this
</UL>
<P>
Here is an example of a GF grammar, consisting of three modules,
as translated to GFCC. The representations are aligned; thus they do not completely
reflect the order of judgements in GFCC files, which have different orders of
blocks of judgements, and alphabetical sorting.
</P>
<PRE>
grammar Ex(Eng,Swe);
abstract Ex = { abstract {
cat cat
S ; NP ; VP ; NP[]; S[]; VP[];
fun fun
Pred : NP -&gt; VP -&gt; S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
She, They : NP ; She=[0,"she"];
Sleep : VP ; They=[1,"they"];
Sleep=[["sleeps","sleep"]];
} } ;
concrete Eng of Ex = { concrete Eng {
lincat lincat
S = {s : Str} ; S=[()];
NP = {s : Str ; n : Num} ; NP=[1,()];
VP = {s : Num =&gt; Str} ; VP=[[(),()]];
param
Num = Sg | Pl ;
lin lin
Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
s = np.s ++ vp.s ! np.n} ;
She = {s = "she" ; n = Sg} ; She=[0,"she"];
They = {s = "they" ; n = Pl} ; They = [1, "they"];
Sleep = {s = table { Sleep=[["sleeps","sleep"]];
Sg =&gt; "sleeps" ;
Pl =&gt; "sleep"
}
} ;
} } ;
concrete Swe of Ex = { concrete Swe {
lincat lincat
S = {s : Str} ; S=[()];
NP = {s : Str} ; NP=[()];
VP = {s : Str} ; VP=[()];
param
Num = Sg | Pl ;
lin lin
Pred np vp = { Pred = [(($0!0),($1!0))];
s = np.s ++ vp.s} ;
She = {s = "hon"} ; She = ["hon"];
They = {s = "de"} ; They = ["de"];
Sleep = {s = "sover"} ; Sleep = ["sover"];
} } ;
</PRE>
<P></P>
<H2>The syntax of GFCC files</H2>
<P>
The complete BNFC grammar, from which
the rules in this section are taken, is in the file
<A HREF="../DataGFCC.cf"><CODE>GF/GFCC/GFCC.cf</CODE></A>.
</P>
<H3>Top level</H3>
<P>
A grammar has a header telling the name of the abstract syntax
(often specifying an application domain), and the names of
the concrete languages. The abstract syntax and the concrete
syntaxes themselves follow.
</P>
<PRE>
Grm. Grammar ::=
"grammar" CId "(" [CId] ")" ";"
Abstract ";"
[Concrete] ;
Abs. Abstract ::=
"abstract" "{"
"flags" [Flag]
"fun" [FunDef]
"cat" [CatDef]
"}" ;
Cnc. Concrete ::=
"concrete" CId "{"
"flags" [Flag]
"lin" [LinDef]
"oper" [LinDef]
"lincat" [LinDef]
"lindef" [LinDef]
"printname" [LinDef]
"}" ;
</PRE>
<P>
This syntax organizes each module to a sequence of <B>fields</B>, such
as flags, linearizations, operations, linearization types, etc.
It is envisaged that particular applications can ignore some
of the fields, typically so that earlier fields are more
important than later ones.
</P>
<P>
The judgement forms have the following syntax.
</P>
<PRE>
Flg. Flag ::= CId "=" String ;
Cat. CatDef ::= CId "[" [Hypo] "]" ;
Fun. FunDef ::= CId ":" Type "=" Exp ;
Lin. LinDef ::= CId "=" Term ;
</PRE>
<P>
For the run-time system, the reference implementation in Haskell
uses a structure that gives efficient look-up:
</P>
<PRE>
data GFCC = GFCC {
absname :: CId ,
cncnames :: [CId] ,
abstract :: Abstr ,
concretes :: Map CId Concr
}
data Abstr = Abstr {
aflags :: Map CId String, -- value of a flag
funs :: Map CId (Type,Exp), -- type and def of a fun
cats :: Map CId [Hypo], -- context of a cat
catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup)
}
data Concr = Concr {
flags :: Map CId String, -- value of a flag
lins :: Map CId Term, -- lin of a fun
opers :: Map CId Term, -- oper generated by subex elim
lincats :: Map CId Term, -- lin type of a cat
lindefs :: Map CId Term, -- lin default of a cat
printnames :: Map CId Term -- printname of a cat or a fun
}
</PRE>
<P>
These definitions are from <A HREF="../DataGFCC.hs"><CODE>GF/GFCC/DataGFCC.hs</CODE></A>.
</P>
<P>
Identifiers (<CODE>CId</CODE>) are like <CODE>Ident</CODE> in GF, except that
the compiler produces constants prefixed with <CODE>_</CODE> in
the common subterm elimination optimization.
</P>
<PRE>
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
</PRE>
<P></P>
<H3>Abstract syntax</H3>
<P>
Types are first-order function types built from argument type
contexts and value types.
category symbols. Syntax trees (<CODE>Exp</CODE>) are
rose trees with nodes consisting of a head (<CODE>Atom</CODE>) and
bound variables (<CODE>CId</CODE>).
</P>
<PRE>
DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ;
DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ;
Hyp. Hypo ::= CId ":" Type ;
</PRE>
<P>
The head Atom is either a function
constant, a bound variable, or a metavariable, or a string, integer, or float
literal.
</P>
<PRE>
AC. Atom ::= CId ;
AS. Atom ::= String ;
AI. Atom ::= Integer ;
AF. Atom ::= Double ;
AM. Atom ::= "?" Integer ;
</PRE>
<P>
The context-free types and trees of the "old GFCC" are special
cases, which can be defined as follows:
</P>
<PRE>
Typ. Type ::= [CId] "-&gt;" CId
Typ args val = DTyp [Hyp (CId "_") arg | arg &lt;- args] val
Tr. Exp ::= "(" CId [Exp] ")"
Tr fun exps = DTr [] fun exps
</PRE>
<P>
To store semantic (<CODE>def</CODE>) definitions by cases, the following expression
form is provided, but it is only meaningful in the last field of a function
declaration in an abstract syntax:
</P>
<PRE>
EEq. Exp ::= "{" [Equation] "}" ;
Equ. Equation ::= [Exp] "-&gt;" Exp ;
</PRE>
<P>
Notice that expressions are used to encode patterns. Primitive notions
(the default semantics in GF) are encoded as empty sets of equations
(<CODE>[]</CODE>). For a constructor (canonical form) of a category <CODE>C</CODE>, we
aim to use the encoding as the application <CODE>(_constr C)</CODE>.
</P>
<H3>Concrete syntax</H3>
<P>
Linearization terms (<CODE>Term</CODE>) are built as follows.
Constructor names are shown to make the later code
examples readable.
</P>
<PRE>
R. Term ::= "[" [Term] "]" ; -- array (record/table)
P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection)
S. Term ::= "(" [Term] ")" ; -- concatenated sequence
K. Term ::= Tokn ; -- token
V. Term ::= "$" Integer ; -- argument (subtree)
C. Term ::= Integer ; -- array index (label/parameter value)
FV. Term ::= "[|" [Term] "|]" ; -- free variation
TM. Term ::= "?" ; -- linearization of metavariable
</PRE>
<P>
Tokens are strings or (maybe obsolescent) prefix-dependent
variant lists.
</P>
<PRE>
KS. Tokn ::= String ;
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
Var. Variant ::= [String] "/" [String] ;
</PRE>
<P>
Two special forms of terms are introduced by the compiler
as optimizations. They can in principle be eliminated, but
their presence makes grammars much more compact. Their semantics
will be explained in a later section.
</P>
<PRE>
F. Term ::= CId ; -- global constant
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
</PRE>
<P>
There is also a deprecated form of "record parameter alias",
</P>
<PRE>
RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED
</PRE>
<P>
which will be removed when the migration to new GFCC is complete.
</P>
<H2>The semantics of concrete syntax terms</H2>
<P>
The code in this section is from <A HREF="../Linearize.hs"><CODE>GF/GFCC/Linearize.hs</CODE></A>.
</P>
<H3>Linearization and realization</H3>
<P>
The linearization algorithm is essentially the same as in
GFC: a tree is linearized by evaluating its linearization term
in the environment of the linearizations of the subtrees.
Literal atoms are linearized in the obvious way.
The function also needs to know the language (i.e. concrete syntax)
in which linearization is performed.
</P>
<PRE>
linExp :: GFCC -&gt; CId -&gt; Exp -&gt; Term
linExp gfcc lang tree@(DTr _ at trees) = case at of
AC fun -&gt; comp (Prelude.map lin trees) $ look fun
AS s -&gt; R [kks (show s)] -- quoted
AI i -&gt; R [kks (show i)]
AF d -&gt; R [kks (show d)]
AM -&gt; TM
where
lin = linExp gfcc lang
comp = compute gfcc lang
look = lookLin gfcc lang
</PRE>
<P>
TODO: bindings must be supported.
</P>
<P>
The result of linearization is usually a record, which is realized as
a string using the following algorithm.
</P>
<PRE>
realize :: Term -&gt; String
realize trm = case trm of
R (t:_) -&gt; realize t
S ss -&gt; unwords $ Prelude.map realize ss
K (KS s) -&gt; s
K (KP s _) -&gt; unwords s ---- prefix choice TODO
W s t -&gt; s ++ realize t
FV (t:_) -&gt; realize t
TM -&gt; "?"
</PRE>
<P>
Notice that realization always picks the first field of a record.
If a linearization type has more than one field, the first field
does not necessarily contain the desired string.
Also notice that the order of record fields in GFCC is not necessarily
the same as in GF source.
</P>
<H3>Term evaluation</H3>
<P>
Evaluation follows call-by-value order, with two environments
needed:
</P>
<UL>
<LI>the grammar (a concrete syntax) to give the global constants
<LI>an array of terms to give the subtree linearizations
</UL>
<P>
The code is presented in one-level pattern matching, to
enable reimplementations in languages that do not permit
deep patterns (such as Java and C++).
</P>
<PRE>
compute :: GFCC -&gt; CId -&gt; [Term] -&gt; Term -&gt; Term
compute gfcc lang args = comp where
comp trm = case trm of
P r p -&gt; proj (comp r) (comp p)
W s t -&gt; W s (comp t)
R ts -&gt; R $ Prelude.map comp ts
V i -&gt; idx args (fromInteger i) -- already computed
F c -&gt; comp $ look c -- not computed (if contains V)
FV ts -&gt; FV $ Prelude.map comp ts
S ts -&gt; S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -&gt; trm
look = lookOper gfcc lang
idx xs i = xs !! i
proj r p = case (r,p) of
(_, FV ts) -&gt; FV $ Prelude.map (proj r) ts
(W s t, _) -&gt; kks (s ++ getString (proj t p))
_ -&gt; comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -&gt; s
_ -&gt; trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -&gt; fromInteger i
RP p _ -&gt; getIndex p
TM -&gt; 0 -- default value for parameter
_ -&gt; trace ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -&gt; idx rs i
RP _ r -&gt; getField r i
TM -&gt; TM
_ -&gt; trace ("ERROR in grammar compiler: field from " ++ show t) t
</PRE>
<P></P>
<H3>The special term constructors</H3>
<P>
The three forms introduced by the compiler may a need special
explanation.
</P>
<P>
Global constants
</P>
<PRE>
Term ::= CId ;
</PRE>
<P>
are shorthands for complex terms. They are produced by the
compiler by (iterated) <B>common subexpression elimination</B>.
They are often more powerful than hand-devised code sharing in the source
code. They could be computed off-line by replacing each identifier by
its definition.
</P>
<P>
<B>Prefix-suffix tables</B>
</P>
<PRE>
Term ::= "(" String "+" Term ")" ;
</PRE>
<P>
represent tables of word forms divided to the longest common prefix
and its array of suffixes. In the example grammar above, we have
</P>
<PRE>
Sleep = [("sleep" + ["s",""])]
</PRE>
<P>
which in fact is equal to the array of full forms
</P>
<PRE>
["sleeps", "sleep"]
</PRE>
<P>
The power of this construction comes from the fact that suffix sets
tend to be repeated in a language, and can therefore be collected
by common subexpression elimination. It is this technique that
explains the used syntax rather than the more accurate
</P>
<PRE>
"(" String "+" [String] ")"
</PRE>
<P>
since we want the suffix part to be a <CODE>Term</CODE> for the optimization to
take effect.
</P>
<H2>Compiling to GFCC</H2>
<P>
Compilation to GFCC is performed by the GF grammar compiler, and
GFCC interpreters need not know what it does. For grammar writers,
however, it might be interesting to know what happens to the grammars
in the process.
</P>
<P>
The compilation phases are the following
</P>
<OL>
<LI>type check and partially evaluate GF source
<LI>create a symbol table mapping the GF parameter and record types to
fixed-size arrays, and parameter values and record labels to integers
<LI>traverse the linearization rules replacing parameters and labels by integers
<LI>reorganize the created GF grammar so that it has just one abstract syntax
and one concrete syntax per language
<LI>TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the
<CODE>coding</CODE> flag)
<LI>translate the GF grammar object to a GFCC grammar object, using a simple
compositional mapping
<LI>perform the word-suffix optimization on GFCC linearization terms
<LI>perform subexpression elimination on each concrete syntax module
<LI>print out the GFCC code
</OL>
<H3>Problems in GFCC compilation</H3>
<P>
Two major problems had to be solved in compiling GF to GFCC:
</P>
<UL>
<LI>consistent order of tables and records, to permit the array translation
<LI>run-time variables in complex parameter values.
</UL>
<P>
The current implementation is still experimental and may fail
to generate correct code. Any errors remaining are likely to be
related to the two problems just mentioned.
</P>
<P>
The order problem is solved in slightly different ways for tables and records.
In both cases, <B>eta expansion</B> is used to establish a
canonical order. Tables are ordered by applying the preorder induced
by <CODE>param</CODE> definitions. Records are ordered by sorting them by labels.
This means that
e.g. the <CODE>s</CODE> field will in general no longer appear as the first
field, even if it does so in the GF source code. But relying on the
order of fields in a labelled record would be misplaced anyway.
</P>
<P>
The canonical form of records is further complicated by lock fields,
i.e. dummy fields of form <CODE>lock_C = &lt;&gt;</CODE>, which are added to grammar
libraries to force intensionality of linearization types. The problem
is that the absence of a lock field only generates a warning, not
an error. Therefore a GF grammar can contain objects of the same
type with and without a lock field. This problem was solved in GFCC
generation by just removing all lock fields (defined as fields whose
type is the empty record type). This has the further advantage of
(slightly) reducing the grammar size. More importantly, it is safe
to remove lock fields, because they are never used in computation,
and because intensional types are only needed in grammars reused
as libraries, not in grammars used at runtime.
</P>
<P>
While the order problem is rather bureaucratic in nature, run-time
variables are an interesting problem. They arise in the presence
of complex parameter values, created by argument-taking constructors
and parameter records. To give an example, consider the GF parameter
type system
</P>
<PRE>
Number = Sg | Pl ;
Person = P1 | P2 | P3 ;
Agr = Ag Number Person ;
</PRE>
<P>
The values can be translated to integers in the expected way,
</P>
<PRE>
Sg = 0, Pl = 1
P1 = 0, P2 = 1, P3 = 2
Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
</PRE>
<P>
However, an argument of <CODE>Agr</CODE> can be a run-time variable, as in
</P>
<PRE>
Ag np.n P3
</PRE>
<P>
This expression must first be translated to a case expression,
</P>
<PRE>
case np.n of {
0 =&gt; 2 ;
1 =&gt; 5
}
</PRE>
<P>
which can then be translated to the GFCC term
</P>
<PRE>
([2,5] ! ($0 ! $1))
</PRE>
<P>
assuming that the variable <CODE>np</CODE> is the first argument and that its
<CODE>Number</CODE> field is the second in the record.
</P>
<P>
This transformation of course has to be performed recursively, since
there can be several run-time variables in a parameter value:
</P>
<PRE>
Ag np.n np.p
</PRE>
<P>
A similar transformation would be possible to deal with the double
role of parameter records discussed above. Thus the type
</P>
<PRE>
RNP = {n : Number ; p : Person}
</PRE>
<P>
could be uniformly translated into the set <CODE>{0,1,2,3,4,5}</CODE>
as <CODE>Agr</CODE> above. Selections would be simple instances of indexing.
But any projection from the record should be translated into
a case expression,
</P>
<PRE>
rnp.n ===&gt;
case rnp of {
0 =&gt; 0 ;
1 =&gt; 0 ;
2 =&gt; 0 ;
3 =&gt; 1 ;
4 =&gt; 1 ;
5 =&gt; 1
}
</PRE>
<P>
To avoid the code bloat resulting from this, we have chosen to
deal with records by a <B>currying</B> transformation:
</P>
<PRE>
table {n : Number ; p : Person} {... ...}
===&gt;
table Number {Sg =&gt; table Person {...} ; table Person {...}}
</PRE>
<P>
This is performed when GFCC is generated. Selections with
records have to be treated likewise,
</P>
<PRE>
t ! r ===&gt; t ! r.n ! r.p
</PRE>
<P></P>
<H3>The representation of linearization types</H3>
<P>
Linearization types (<CODE>lincat</CODE>) are not needed when generating with
GFCC, but they have been added to enable parser generation directly from
GFCC. The linearization type definitions are shown as a part of the
concrete syntax, by using terms to represent types. Here is the table
showing how different linearization types are encoded.
</P>
<PRE>
P* = max(P) -- parameter type
{r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record
(P =&gt; T)* = [T* ,...,T*] -- table, size(P) cases
Str* = ()
</PRE>
<P>
For example, the linearization type <CODE>present/CatEng.NP</CODE> is
translated as follows:
</P>
<PRE>
NP = {
a : { -- 6 = 2*3 values
n : {ParamX.Number} ; -- 2 values
p : {ParamX.Person} -- 3 values
} ;
s : {ResEng.Case} =&gt; Str -- 3 values
}
__NP = [[1,2],[(),(),()]]
</PRE>
<P></P>
<H3>Running the compiler and the GFCC interpreter</H3>
<P>
GFCC generation is a part of the
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">developers' version</A>
of GF since September 2006. To invoke the compiler, the flag
<CODE>-printer=gfcc</CODE> to the command
<CODE>pm = print_multi</CODE> is used. It is wise to recompile the grammar from
source, since previously compiled libraries may not obey the canonical
order of records.
Here is an example, performed in
<A HREF="../../../../../examples/bronzeage">example/bronzeage</A>.
</P>
<PRE>
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
strip
pm -printer=gfcc | wf bronze.gfcc
</PRE>
<P>
There is also an experimental batch compiler, which does not use the GFC
format or the record aliases. It can be produced by
</P>
<PRE>
make gfc
</PRE>
<P>
in <CODE>GF/src</CODE>, and invoked by
</P>
<PRE>
gfc --make FILES
</PRE>
<P></P>
<H2>The reference interpreter</H2>
<P>
The reference interpreter written in Haskell consists of the following files:
</P>
<PRE>
-- source file for BNFC
GFCC.cf -- labelled BNF grammar of gfcc
-- files generated by BNFC
AbsGFCC.hs -- abstrac syntax datatypes
ErrM.hs -- error monad used internally
LexGFCC.hs -- lexer of gfcc files
ParGFCC.hs -- parser of gfcc files and syntax trees
PrintGFCC.hs -- printer of gfcc files and syntax trees
-- hand-written files
DataGFCC.hs -- grammar datatype, post-parser grammar creation
Linearize.hs -- linearization and evaluation
Macros.hs -- utilities abstracting away from GFCC datatypes
Generate.hs -- random and exhaustive generation, generate-and-test parsing
API.hs -- functionalities accessible in embedded GF applications
Generate.hs -- random and exhaustive generation
Shell.hs -- main function - a simple command interpreter
</PRE>
<P>
It is included in the
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">developers' version</A>
of GF, in the subdirectories <A HREF="../"><CODE>GF/src/GF/GFCC</CODE></A> and
<A HREF="../../Devel"><CODE>GF/src/GF/Devel</CODE></A>.
</P>
<P>
As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir
Angelov). The interpreter uses the relevant modules
</P>
<PRE>
GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC
GF/Parsing/FCFG.hs -- run the parser
</PRE>
<P></P>
<P>
To compile the interpreter, type
</P>
<PRE>
make gfcc
</PRE>
<P>
in <CODE>GF/src</CODE>. To run it, type
</P>
<PRE>
./gfcc &lt;GFCC-file&gt;
</PRE>
<P>
The available commands are
</P>
<UL>
<LI><CODE>gr &lt;Cat&gt; &lt;Int&gt;</CODE>: generate a number of random trees in category.
and show their linearizations in all languages
<LI><CODE>grt &lt;Cat&gt; &lt;Int&gt;</CODE>: generate a number of random trees in category.
and show the trees and their linearizations in all languages
<LI><CODE>gt &lt;Cat&gt; &lt;Int&gt;</CODE>: generate a number of trees in category from smallest,
and show their linearizations in all languages
<LI><CODE>gtt &lt;Cat&gt; &lt;Int&gt;</CODE>: generate a number of trees in category from smallest,
and show the trees and their linearizations in all languages
<LI><CODE>p &lt;Lang&gt; &lt;Cat&gt; &lt;String&gt;</CODE>: parse a string into a set of trees
<LI><CODE>lin &lt;Tree&gt;</CODE>: linearize tree in all languages, also showing full records
<LI><CODE>q</CODE>: terminate the system cleanly
</UL>
<H2>Embedded formats</H2>
<UL>
<LI>JavaScript: compiler of linearization and abstract syntax
<P></P>
<LI>Haskell: compiler of abstract syntax and interpreter with parsing,
linearization, and generation
<P></P>
<LI>C: compiler of linearization (old GFCC)
<P></P>
<LI>C++: embedded interpreter supporting linearization (old GFCC)
</UL>
<H2>Some things to do</H2>
<P>
Support for dependent types, higher-order abstract syntax, and
semantic definition in GFCC generation and interpreters.
</P>
<P>
Replacing the entire GF shell by one based on GFCC.
</P>
<P>
Interpreter in Java.
</P>
<P>
Hand-written parsers for GFCC grammars to reduce code size
(and efficiency?) of interpreters.
</P>
<P>
Binary format and/or file compression of GFCC output.
</P>
<P>
Syntax editor based on GFCC.
</P>
<P>
Rewriting of resource libraries in order to exploit the
word-suffix sharing better (depth-one tables, as in FM).
</P>
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags -thtml gfcc.txt -->
</BODY></HTML>

View File

@@ -1,712 +0,0 @@
The GFCC Grammar Format
Aarne Ranta
December 14, 2007
Author's address:
[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
% to compile: txt2tags -thtml --toc gfcc.txt
History:
- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC
- 5 Oct 2007: new, better structured GFCC with full expressive power
- 19 Oct: translation of lincats, new figures on C++
- 3 Oct 2006: first version
==What is GFCC==
GFCC is a low-level format for GF grammars. Its aim is to contain the minimum
that is needed to process GF grammars at runtime. This minimality has three
advantages:
- compact grammar files and run-time objects
- time and space efficient processing
- simple definition of interpreters
Thus we also want to call GFCC the **portable grammar format**.
The idea is that all embedded GF applications use GFCC.
The GF system would be primarily used as a compiler and as a grammar
development tool.
Since GFCC is implemented in BNFC, a parser of the format is readily
available for C, C++, C#, Haskell, Java, and OCaml. Also an XML
representation can be generated in BNFC. A
[reference implementation ../]
of linearization and some other functions has been written in Haskell.
==GFCC vs. GFC==
GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed
to be a run-time format, but also to
support separate compilation of grammars, i.e.
to store the results of compiling
individual GF modules. But this means that GFC has to contain extra information,
such as type annotations, which is only needed in compilation and not at
run-time. In particular, the pattern matching syntax and semantics of GFC is
complex and therefore difficult to implement in new platforms.
Actually, GFC is planned to be omitted also as the target format of
separate compilation, where plain GF (type annotated and partially evaluated)
will be used instead. GFC provides only marginal advantages as a target format
compared with GF, and it is therefore just extra weight to carry around this
format.
The main differences of GFCC compared with GFC (and GF) can be
summarized as follows:
- there are no modules, and therefore no qualified names
- a GFCC grammar is multilingual, and consists of a common abstract syntax
together with one concrete syntax per language
- records and tables are replaced by arrays
- record labels and parameter values are replaced by integers
- record projection and table selection are replaced by array indexing
- even though the format does support dependent types and higher-order abstract
syntax, there is no interpreted yet that does this
Here is an example of a GF grammar, consisting of three modules,
as translated to GFCC. The representations are aligned;
thus they do not completely
reflect the order of judgements in GFCC files, which have different orders of
blocks of judgements, and alphabetical sorting.
```
grammar Ex(Eng,Swe);
abstract Ex = { abstract {
cat cat
S ; NP ; VP ; NP[]; S[]; VP[];
fun fun
Pred : NP -> VP -> S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
She, They : NP ; She=[0,"she"];
Sleep : VP ; They=[1,"they"];
Sleep=[["sleeps","sleep"]];
} } ;
concrete Eng of Ex = { concrete Eng {
lincat lincat
S = {s : Str} ; S=[()];
NP = {s : Str ; n : Num} ; NP=[1,()];
VP = {s : Num => Str} ; VP=[[(),()]];
param
Num = Sg | Pl ;
lin lin
Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
s = np.s ++ vp.s ! np.n} ;
She = {s = "she" ; n = Sg} ; She=[0,"she"];
They = {s = "they" ; n = Pl} ; They = [1, "they"];
Sleep = {s = table { Sleep=[["sleeps","sleep"]];
Sg => "sleeps" ;
Pl => "sleep"
}
} ;
} } ;
concrete Swe of Ex = { concrete Swe {
lincat lincat
S = {s : Str} ; S=[()];
NP = {s : Str} ; NP=[()];
VP = {s : Str} ; VP=[()];
param
Num = Sg | Pl ;
lin lin
Pred np vp = { Pred = [(($0!0),($1!0))];
s = np.s ++ vp.s} ;
She = {s = "hon"} ; She = ["hon"];
They = {s = "de"} ; They = ["de"];
Sleep = {s = "sover"} ; Sleep = ["sover"];
} } ;
```
==The syntax of GFCC files==
The complete BNFC grammar, from which
the rules in this section are taken, is in the file
[``GF/GFCC/GFCC.cf`` ../DataGFCC.cf].
===Top level===
A grammar has a header telling the name of the abstract syntax
(often specifying an application domain), and the names of
the concrete languages. The abstract syntax and the concrete
syntaxes themselves follow.
```
Grm. Grammar ::=
"grammar" CId "(" [CId] ")" ";"
Abstract ";"
[Concrete] ;
Abs. Abstract ::=
"abstract" "{"
"flags" [Flag]
"fun" [FunDef]
"cat" [CatDef]
"}" ;
Cnc. Concrete ::=
"concrete" CId "{"
"flags" [Flag]
"lin" [LinDef]
"oper" [LinDef]
"lincat" [LinDef]
"lindef" [LinDef]
"printname" [LinDef]
"}" ;
```
This syntax organizes each module to a sequence of **fields**, such
as flags, linearizations, operations, linearization types, etc.
It is envisaged that particular applications can ignore some
of the fields, typically so that earlier fields are more
important than later ones.
The judgement forms have the following syntax.
```
Flg. Flag ::= CId "=" String ;
Cat. CatDef ::= CId "[" [Hypo] "]" ;
Fun. FunDef ::= CId ":" Type "=" Exp ;
Lin. LinDef ::= CId "=" Term ;
```
For the run-time system, the reference implementation in Haskell
uses a structure that gives efficient look-up:
```
data GFCC = GFCC {
absname :: CId ,
cncnames :: [CId] ,
abstract :: Abstr ,
concretes :: Map CId Concr
}
data Abstr = Abstr {
aflags :: Map CId String, -- value of a flag
funs :: Map CId (Type,Exp), -- type and def of a fun
cats :: Map CId [Hypo], -- context of a cat
catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup)
}
data Concr = Concr {
flags :: Map CId String, -- value of a flag
lins :: Map CId Term, -- lin of a fun
opers :: Map CId Term, -- oper generated by subex elim
lincats :: Map CId Term, -- lin type of a cat
lindefs :: Map CId Term, -- lin default of a cat
printnames :: Map CId Term -- printname of a cat or a fun
}
```
These definitions are from [``GF/GFCC/DataGFCC.hs`` ../DataGFCC.hs].
Identifiers (``CId``) are like ``Ident`` in GF, except that
the compiler produces constants prefixed with ``_`` in
the common subterm elimination optimization.
```
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
```
===Abstract syntax===
Types are first-order function types built from argument type
contexts and value types.
category symbols. Syntax trees (``Exp``) are
rose trees with nodes consisting of a head (``Atom``) and
bound variables (``CId``).
```
DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ;
DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ;
Hyp. Hypo ::= CId ":" Type ;
```
The head Atom is either a function
constant, a bound variable, or a metavariable, or a string, integer, or float
literal.
```
AC. Atom ::= CId ;
AS. Atom ::= String ;
AI. Atom ::= Integer ;
AF. Atom ::= Double ;
AM. Atom ::= "?" Integer ;
```
The context-free types and trees of the "old GFCC" are special
cases, which can be defined as follows:
```
Typ. Type ::= [CId] "->" CId
Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val
Tr. Exp ::= "(" CId [Exp] ")"
Tr fun exps = DTr [] fun exps
```
To store semantic (``def``) definitions by cases, the following expression
form is provided, but it is only meaningful in the last field of a function
declaration in an abstract syntax:
```
EEq. Exp ::= "{" [Equation] "}" ;
Equ. Equation ::= [Exp] "->" Exp ;
```
Notice that expressions are used to encode patterns. Primitive notions
(the default semantics in GF) are encoded as empty sets of equations
(``[]``). For a constructor (canonical form) of a category ``C``, we
aim to use the encoding as the application ``(_constr C)``.
===Concrete syntax===
Linearization terms (``Term``) are built as follows.
Constructor names are shown to make the later code
examples readable.
```
R. Term ::= "[" [Term] "]" ; -- array (record/table)
P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection)
S. Term ::= "(" [Term] ")" ; -- concatenated sequence
K. Term ::= Tokn ; -- token
V. Term ::= "$" Integer ; -- argument (subtree)
C. Term ::= Integer ; -- array index (label/parameter value)
FV. Term ::= "[|" [Term] "|]" ; -- free variation
TM. Term ::= "?" ; -- linearization of metavariable
```
Tokens are strings or (maybe obsolescent) prefix-dependent
variant lists.
```
KS. Tokn ::= String ;
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
Var. Variant ::= [String] "/" [String] ;
```
Two special forms of terms are introduced by the compiler
as optimizations. They can in principle be eliminated, but
their presence makes grammars much more compact. Their semantics
will be explained in a later section.
```
F. Term ::= CId ; -- global constant
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
```
There is also a deprecated form of "record parameter alias",
```
RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED
```
which will be removed when the migration to new GFCC is complete.
==The semantics of concrete syntax terms==
The code in this section is from [``GF/GFCC/Linearize.hs`` ../Linearize.hs].
===Linearization and realization===
The linearization algorithm is essentially the same as in
GFC: a tree is linearized by evaluating its linearization term
in the environment of the linearizations of the subtrees.
Literal atoms are linearized in the obvious way.
The function also needs to know the language (i.e. concrete syntax)
in which linearization is performed.
```
linExp :: GFCC -> CId -> Exp -> Term
linExp gfcc lang tree@(DTr _ at trees) = case at of
AC fun -> comp (Prelude.map lin trees) $ look fun
AS s -> R [kks (show s)] -- quoted
AI i -> R [kks (show i)]
AF d -> R [kks (show d)]
AM -> TM
where
lin = linExp gfcc lang
comp = compute gfcc lang
look = lookLin gfcc lang
```
TODO: bindings must be supported.
The result of linearization is usually a record, which is realized as
a string using the following algorithm.
```
realize :: Term -> String
realize trm = case trm of
R (t:_) -> realize t
S ss -> unwords $ Prelude.map realize ss
K (KS s) -> s
K (KP s _) -> unwords s ---- prefix choice TODO
W s t -> s ++ realize t
FV (t:_) -> realize t
TM -> "?"
```
Notice that realization always picks the first field of a record.
If a linearization type has more than one field, the first field
does not necessarily contain the desired string.
Also notice that the order of record fields in GFCC is not necessarily
the same as in GF source.
===Term evaluation===
Evaluation follows call-by-value order, with two environments
needed:
- the grammar (a concrete syntax) to give the global constants
- an array of terms to give the subtree linearizations
The code is presented in one-level pattern matching, to
enable reimplementations in languages that do not permit
deep patterns (such as Java and C++).
```
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute gfcc lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
W s t -> W s (comp t)
R ts -> R $ Prelude.map comp ts
V i -> idx args (fromInteger i) -- already computed
F c -> comp $ look c -- not computed (if contains V)
FV ts -> FV $ Prelude.map comp ts
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -> trm
look = lookOper gfcc lang
idx xs i = xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ Prelude.map (proj r) ts
(FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> fromInteger i
RP p _ -> getIndex p
TM -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -> idx rs i
RP _ r -> getField r i
TM -> TM
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
```
===The special term constructors===
The three forms introduced by the compiler may a need special
explanation.
Global constants
```
Term ::= CId ;
```
are shorthands for complex terms. They are produced by the
compiler by (iterated) **common subexpression elimination**.
They are often more powerful than hand-devised code sharing in the source
code. They could be computed off-line by replacing each identifier by
its definition.
**Prefix-suffix tables**
```
Term ::= "(" String "+" Term ")" ;
```
represent tables of word forms divided to the longest common prefix
and its array of suffixes. In the example grammar above, we have
```
Sleep = [("sleep" + ["s",""])]
```
which in fact is equal to the array of full forms
```
["sleeps", "sleep"]
```
The power of this construction comes from the fact that suffix sets
tend to be repeated in a language, and can therefore be collected
by common subexpression elimination. It is this technique that
explains the used syntax rather than the more accurate
```
"(" String "+" [String] ")"
```
since we want the suffix part to be a ``Term`` for the optimization to
take effect.
==Compiling to GFCC==
Compilation to GFCC is performed by the GF grammar compiler, and
GFCC interpreters need not know what it does. For grammar writers,
however, it might be interesting to know what happens to the grammars
in the process.
The compilation phases are the following
+ type check and partially evaluate GF source
+ create a symbol table mapping the GF parameter and record types to
fixed-size arrays, and parameter values and record labels to integers
+ traverse the linearization rules replacing parameters and labels by integers
+ reorganize the created GF grammar so that it has just one abstract syntax
and one concrete syntax per language
+ TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the
``coding`` flag)
+ translate the GF grammar object to a GFCC grammar object, using a simple
compositional mapping
+ perform the word-suffix optimization on GFCC linearization terms
+ perform subexpression elimination on each concrete syntax module
+ print out the GFCC code
===Problems in GFCC compilation===
Two major problems had to be solved in compiling GF to GFCC:
- consistent order of tables and records, to permit the array translation
- run-time variables in complex parameter values.
The current implementation is still experimental and may fail
to generate correct code. Any errors remaining are likely to be
related to the two problems just mentioned.
The order problem is solved in slightly different ways for tables and records.
In both cases, **eta expansion** is used to establish a
canonical order. Tables are ordered by applying the preorder induced
by ``param`` definitions. Records are ordered by sorting them by labels.
This means that
e.g. the ``s`` field will in general no longer appear as the first
field, even if it does so in the GF source code. But relying on the
order of fields in a labelled record would be misplaced anyway.
The canonical form of records is further complicated by lock fields,
i.e. dummy fields of form ``lock_C = <>``, which are added to grammar
libraries to force intensionality of linearization types. The problem
is that the absence of a lock field only generates a warning, not
an error. Therefore a GF grammar can contain objects of the same
type with and without a lock field. This problem was solved in GFCC
generation by just removing all lock fields (defined as fields whose
type is the empty record type). This has the further advantage of
(slightly) reducing the grammar size. More importantly, it is safe
to remove lock fields, because they are never used in computation,
and because intensional types are only needed in grammars reused
as libraries, not in grammars used at runtime.
While the order problem is rather bureaucratic in nature, run-time
variables are an interesting problem. They arise in the presence
of complex parameter values, created by argument-taking constructors
and parameter records. To give an example, consider the GF parameter
type system
```
Number = Sg | Pl ;
Person = P1 | P2 | P3 ;
Agr = Ag Number Person ;
```
The values can be translated to integers in the expected way,
```
Sg = 0, Pl = 1
P1 = 0, P2 = 1, P3 = 2
Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
```
However, an argument of ``Agr`` can be a run-time variable, as in
```
Ag np.n P3
```
This expression must first be translated to a case expression,
```
case np.n of {
0 => 2 ;
1 => 5
}
```
which can then be translated to the GFCC term
```
([2,5] ! ($0 ! $1))
```
assuming that the variable ``np`` is the first argument and that its
``Number`` field is the second in the record.
This transformation of course has to be performed recursively, since
there can be several run-time variables in a parameter value:
```
Ag np.n np.p
```
A similar transformation would be possible to deal with the double
role of parameter records discussed above. Thus the type
```
RNP = {n : Number ; p : Person}
```
could be uniformly translated into the set ``{0,1,2,3,4,5}``
as ``Agr`` above. Selections would be simple instances of indexing.
But any projection from the record should be translated into
a case expression,
```
rnp.n ===>
case rnp of {
0 => 0 ;
1 => 0 ;
2 => 0 ;
3 => 1 ;
4 => 1 ;
5 => 1
}
```
To avoid the code bloat resulting from this, we have chosen to
deal with records by a **currying** transformation:
```
table {n : Number ; p : Person} {... ...}
===>
table Number {Sg => table Person {...} ; table Person {...}}
```
This is performed when GFCC is generated. Selections with
records have to be treated likewise,
```
t ! r ===> t ! r.n ! r.p
```
===The representation of linearization types===
Linearization types (``lincat``) are not needed when generating with
GFCC, but they have been added to enable parser generation directly from
GFCC. The linearization type definitions are shown as a part of the
concrete syntax, by using terms to represent types. Here is the table
showing how different linearization types are encoded.
```
P* = max(P) -- parameter type
{r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record
(P => T)* = [T* ,...,T*] -- table, size(P) cases
Str* = ()
```
For example, the linearization type ``present/CatEng.NP`` is
translated as follows:
```
NP = {
a : { -- 6 = 2*3 values
n : {ParamX.Number} ; -- 2 values
p : {ParamX.Person} -- 3 values
} ;
s : {ResEng.Case} => Str -- 3 values
}
__NP = [[1,2],[(),(),()]]
```
===Running the compiler and the GFCC interpreter===
GFCC generation is a part of the
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
of GF since September 2006. To invoke the compiler, the flag
``-printer=gfcc`` to the command
``pm = print_multi`` is used. It is wise to recompile the grammar from
source, since previously compiled libraries may not obey the canonical
order of records.
Here is an example, performed in
[example/bronzeage ../../../../../examples/bronzeage].
```
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
strip
pm -printer=gfcc | wf bronze.gfcc
```
There is also an experimental batch compiler, which does not use the GFC
format or the record aliases. It can be produced by
```
make gfc
```
in ``GF/src``, and invoked by
```
gfc --make FILES
```
==The reference interpreter==
The reference interpreter written in Haskell consists of the following files:
```
-- source file for BNFC
GFCC.cf -- labelled BNF grammar of gfcc
-- files generated by BNFC
AbsGFCC.hs -- abstrac syntax datatypes
ErrM.hs -- error monad used internally
LexGFCC.hs -- lexer of gfcc files
ParGFCC.hs -- parser of gfcc files and syntax trees
PrintGFCC.hs -- printer of gfcc files and syntax trees
-- hand-written files
DataGFCC.hs -- grammar datatype, post-parser grammar creation
Linearize.hs -- linearization and evaluation
Macros.hs -- utilities abstracting away from GFCC datatypes
Generate.hs -- random and exhaustive generation, generate-and-test parsing
API.hs -- functionalities accessible in embedded GF applications
Generate.hs -- random and exhaustive generation
Shell.hs -- main function - a simple command interpreter
```
It is included in the
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
of GF, in the subdirectories [``GF/src/GF/GFCC`` ../] and
[``GF/src/GF/Devel`` ../../Devel].
As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir
Angelov). The interpreter uses the relevant modules
```
GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC
GF/Parsing/FCFG.hs -- run the parser
```
To compile the interpreter, type
```
make gfcc
```
in ``GF/src``. To run it, type
```
./gfcc <GFCC-file>
```
The available commands are
- ``gr <Cat> <Int>``: generate a number of random trees in category.
and show their linearizations in all languages
- ``grt <Cat> <Int>``: generate a number of random trees in category.
and show the trees and their linearizations in all languages
- ``gt <Cat> <Int>``: generate a number of trees in category from smallest,
and show their linearizations in all languages
- ``gtt <Cat> <Int>``: generate a number of trees in category from smallest,
and show the trees and their linearizations in all languages
- ``p <Lang> <Cat> <String>``: parse a string into a set of trees
- ``lin <Tree>``: linearize tree in all languages, also showing full records
- ``q``: terminate the system cleanly
==Embedded formats==
- JavaScript: compiler of linearization and abstract syntax
- Haskell: compiler of abstract syntax and interpreter with parsing,
linearization, and generation
- C: compiler of linearization (old GFCC)
- C++: embedded interpreter supporting linearization (old GFCC)
==Some things to do==
Support for dependent types, higher-order abstract syntax, and
semantic definition in GFCC generation and interpreters.
Replacing the entire GF shell by one based on GFCC.
Interpreter in Java.
Hand-written parsers for GFCC grammars to reduce code size
(and efficiency?) of interpreters.
Binary format and/or file compression of GFCC output.
Syntax editor based on GFCC.
Rewriting of resource libraries in order to exploit the
word-suffix sharing better (depth-one tables, as in FM).

View File

@@ -1,50 +0,0 @@
Grm. Grammar ::= Header ";" Abstract ";" [Concrete] ;
Hdr. Header ::= "grammar" CId "(" [CId] ")" ;
Abs. Abstract ::= "abstract" "{" [AbsDef] "}" ;
Cnc. Concrete ::= "concrete" CId "{" [CncDef] "}" ;
Fun. AbsDef ::= CId ":" Type "=" Exp ;
--AFl. AbsDef ::= "%" CId "=" String ; -- flag
Lin. CncDef ::= CId "=" Term ;
--CFl. CncDef ::= "%" CId "=" String ; -- flag
Typ. Type ::= [CId] "->" CId ;
Tr. Exp ::= "(" Atom [Exp] ")" ;
AC. Atom ::= CId ;
AS. Atom ::= String ;
AI. Atom ::= Integer ;
AF. Atom ::= Double ;
AM. Atom ::= "?" ;
trA. Exp ::= Atom ;
define trA a = Tr a [] ;
R. Term ::= "[" [Term] "]" ; -- record/table
P. Term ::= "(" Term "!" Term ")" ; -- projection/selection
S. Term ::= "(" [Term] ")" ; -- sequence with ++
K. Term ::= Tokn ; -- token
V. Term ::= "$" Integer ; -- argument
C. Term ::= Integer ; -- parameter value/label
F. Term ::= CId ; -- global constant
FV. Term ::= "[|" [Term] "|]" ; -- free variation
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
TM. Term ::= "?" ; -- lin of metavariable
L. Term ::= "(" CId "->" Term ")" ; -- lambda abstracted table
BV. Term ::= "#" CId ; -- lambda-bound variable
KS. Tokn ::= String ;
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
Var. Variant ::= [String] "/" [String] ;
terminator Concrete ";" ;
terminator AbsDef ";" ;
terminator CncDef ";" ;
separator CId "," ;
separator Term "," ;
terminator Exp "" ;
terminator String "" ;
separator Variant "," ;
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;

View File

@@ -1,656 +0,0 @@
The GFCC Grammar Format
Aarne Ranta
October 19, 2006
Author's address:
[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
% to compile: txt2tags -thtml --toc gfcc.txt
History:
- 19 Oct: translation of lincats, new figures on C++
- 3 Oct 2006: first version
==What is GFCC==
GFCC is a low-level format for GF grammars. Its aim is to contain the minimum
that is needed to process GF grammars at runtime. This minimality has three
advantages:
- compact grammar files and run-time objects
- time and space efficient processing
- simple definition of interpreters
The idea is that all embedded GF applications are compiled to GFCC.
The GF system would be primarily used as a compiler and as a grammar
development tool.
Since GFCC is implemented in BNFC, a parser of the format is readily
available for C, C++, Haskell, Java, and OCaml. Also an XML
representation is generated in BNFC. A
[reference implementation ../]
of linearization and some other functions has been written in Haskell.
==GFCC vs. GFC==
GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed
to be a run-time format, but also to
support separate compilation of grammars, i.e.
to store the results of compiling
individual GF modules. But this means that GFC has to contain extra information,
such as type annotations, which is only needed in compilation and not at
run-time. In particular, the pattern matching syntax and semantics of GFC is
complex and therefore difficult to implement in new platforms.
The main differences of GFCC compared with GFC can be summarized as follows:
- there are no modules, and therefore no qualified names
- a GFCC grammar is multilingual, and consists of a common abstract syntax
together with one concrete syntax per language
- records and tables are replaced by arrays
- record labels and parameter values are replaced by integers
- record projection and table selection are replaced by array indexing
- there is (so far) no support for dependent types or higher-order abstract
syntax (which would be easy to add, but make interpreters much more difficult
to write)
Here is an example of a GF grammar, consisting of three modules,
as translated to GFCC. The representations are aligned, with the exceptions
due to the alphabetical sorting of GFCC grammars.
```
grammar Ex(Eng,Swe);
abstract Ex = { abstract {
cat
S ; NP ; VP ;
fun
Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred);
She, They : NP ; She : -> NP = (She);
Sleep : VP ; Sleep : -> VP = (Sleep);
They : -> NP = (They);
} } ;
concrete Eng of Ex = { concrete Eng {
lincat
S = {s : Str} ;
NP = {s : Str ; n : Num} ;
VP = {s : Num => Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))];
s = np.s ++ vp.s ! np.n} ;
She = {s = "she" ; n = Sg} ; She = [0, "she"];
They = {s = "they" ; n = Pl} ;
Sleep = {s = table { Sleep = [("sleep" + ["s",""])];
Sg => "sleeps" ;
Pl => "sleep" They = [1, "they"];
} } ;
} ;
}
concrete Swe of Ex = { concrete Swe {
lincat
S = {s : Str} ;
NP = {s : Str} ;
VP = {s : Str} ;
param
Num = Sg | Pl ;
lin
Pred np vp = { Pred = [(($0!0),($1!0))];
s = np.s ++ vp.s} ;
She = {s = "hon"} ; She = ["hon"];
They = {s = "de"} ; They = ["de"];
Sleep = {s = "sover"} ; Sleep = ["sover"];
} } ;
```
==The syntax of GFCC files==
===Top level===
A grammar has a header telling the name of the abstract syntax
(often specifying an application domain), and the names of
the concrete languages. The abstract syntax and the concrete
syntaxes themselves follow.
```
Grammar ::= Header ";" Abstract ";" [Concrete] ;
Header ::= "grammar" CId "(" [CId] ")" ;
Abstract ::= "abstract" "{" [AbsDef] "}" ;
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
```
Abstract syntax judgements give typings and semantic definitions.
Concrete syntax judgements give linearizations.
```
AbsDef ::= CId ":" Type "=" Exp ;
CncDef ::= CId "=" Term ;
```
Also flags are possible, local to each "module" (i.e. abstract and concretes).
```
AbsDef ::= "%" CId "=" String ;
CncDef ::= "%" CId "=" String ;
```
For the run-time system, the reference implementation in Haskell
uses a structure that gives efficient look-up:
```
data GFCC = GFCC {
absname :: CId ,
cncnames :: [CId] ,
abstract :: Abstr ,
concretes :: Map CId Concr
}
data Abstr = Abstr {
funs :: Map CId Type, -- find the type of a fun
cats :: Map CId [CId] -- find the funs giving a cat
}
type Concr = Map CId Term
```
===Abstract syntax===
Types are first-order function types built from
category symbols. Syntax trees (``Exp``) are
rose trees with the head (``Atom``) either a function
constant, a metavariable, or a string, integer, or float
literal.
```
Type ::= [CId] "->" CId ;
Exp ::= "(" Atom [Exp] ")" ;
Atom ::= CId ; -- function constant
Atom ::= "?" ; -- metavariable
Atom ::= String ; -- string literal
Atom ::= Integer ; -- integer literal
Atom ::= Double ; -- float literal
```
===Concrete syntax===
Linearization terms (``Term``) are built as follows.
Constructor names are shown to make the later code
examples readable.
```
R. Term ::= "[" [Term] "]" ; -- array
P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field
S. Term ::= "(" [Term] ")" ; -- sequence with ++
K. Term ::= Tokn ; -- token
V. Term ::= "$" Integer ; -- argument
C. Term ::= Integer ; -- array index
FV. Term ::= "[|" [Term] "|]" ; -- free variation
TM. Term ::= "?" ; -- linearization of metavariable
```
Tokens are strings or (maybe obsolescent) prefix-dependent
variant lists.
```
KS. Tokn ::= String ;
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
Var. Variant ::= [String] "/" [String] ;
```
Three special forms of terms are introduced by the compiler
as optimizations. They can in principle be eliminated, but
their presence makes grammars much more compact. Their semantics
will be explained in a later section.
```
F. Term ::= CId ; -- global constant
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
```
Identifiers are like ``Ident`` in GF and GFC, except that
the compiler produces constants prefixed with ``_`` in
the common subterm elimination optimization.
```
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
```
==The semantics of concrete syntax terms==
===Linearization and realization===
The linearization algorithm is essentially the same as in
GFC: a tree is linearized by evaluating its linearization term
in the environment of the linearizations of the subtrees.
Literal atoms are linearized in the obvious way.
The function also needs to know the language (i.e. concrete syntax)
in which linearization is performed.
```
linExp :: GFCC -> CId -> Exp -> Term
linExp mcfg lang tree@(Tr at trees) = case at of
AC fun -> comp (Prelude.map lin trees) $ look fun
AS s -> R [kks (show s)] -- quoted
AI i -> R [kks (show i)]
AF d -> R [kks (show d)]
AM -> TM
where
lin = linExp mcfg lang
comp = compute mcfg lang
look = lookLin mcfg lang
```
The result of linearization is usually a record, which is realized as
a string using the following algorithm.
```
realize :: Term -> String
realize trm = case trm of
R (t:_) -> realize t
S ss -> unwords $ Prelude.map realize ss
K (KS s) -> s
K (KP s _) -> unwords s ---- prefix choice TODO
W s t -> s ++ realize t
FV (t:_) -> realize t
TM -> "?"
```
Since the order of record fields is not necessarily
the same as in GF source,
this realization does not work securely for
categories whose lincats more than one field.
===Term evaluation===
Evaluation follows call-by-value order, with two environments
needed:
- the grammar (a concrete syntax) to give the global constants
- an array of terms to give the subtree linearizations
The code is presented in one-level pattern matching, to
enable reimplementations in languages that do not permit
deep patterns (such as Java and C++).
```
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute mcfg lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
RP i t -> RP (comp i) (comp t)
W s t -> W s (comp t)
R ts -> R $ Prelude.map comp ts
V i -> idx args (fromInteger i) -- already computed
F c -> comp $ look c -- not computed (if contains V)
FV ts -> FV $ Prelude.map comp ts
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -> trm
look = lookLin mcfg lang
idx xs i = xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ Prelude.map (proj r) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> fromInteger i
RP p _ -> getIndex p
TM -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -> idx rs i
RP _ r -> getField r i
TM -> TM
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
```
===The special term constructors===
The three forms introduced by the compiler may a need special
explanation.
Global constants
```
Term ::= CId ;
```
are shorthands for complex terms. They are produced by the
compiler by (iterated) common subexpression elimination.
They are often more powerful than hand-devised code sharing in the source
code. They could be computed off-line by replacing each identifier by
its definition.
Prefix-suffix tables
```
Term ::= "(" String "+" Term ")" ;
```
represent tables of word forms divided to the longest common prefix
and its array of suffixes. In the example grammar above, we have
```
Sleep = [("sleep" + ["s",""])]
```
which in fact is equal to the array of full forms
```
["sleeps", "sleep"]
```
The power of this construction comes from the fact that suffix sets
tend to be repeated in a language, and can therefore be collected
by common subexpression elimination. It is this technique that
explains the used syntax rather than the more accurate
```
"(" String "+" [String] ")"
```
since we want the suffix part to be a ``Term`` for the optimization to
take effect.
The most curious construct of GFCC is the parameter array alias,
```
Term ::= "(" Term "@" Term ")";
```
This form is used as the value of parameter records, such as the type
```
{n : Number ; p : Person}
```
The problem with parameter records is their double role.
They can be used like parameter values, as indices in selection,
```
VP.s ! {n = Sg ; p = P3}
```
but also as records, from which parameters can be projected:
```
{n = Sg ; p = P3}.n
```
Whichever use is selected as primary, a prohibitively complex
case expression must be generated at compilation to GFCC to get the
other use. The adopted
solution is to generate a pair containing both a parameter value index
and an array of indices of record fields. For instance, if we have
```
param Number = Sg | Pl ; Person = P1 | P2 | P3 ;
```
we get the encoding
```
{n = Sg ; p = P3} ---> (2 @ [0,2])
```
The GFCC computation rules are essentially
```
(t ! (i @ _)) = (t ! i)
((_ @ r) ! j) =(r ! j)
```
==Compiling to GFCC==
Compilation to GFCC is performed by the GF grammar compiler, and
GFCC interpreters need not know what it does. For grammar writers,
however, it might be interesting to know what happens to the grammars
in the process.
The compilation phases are the following
+ translate GF source to GFC, as always in GF
+ undo GFC back-end optimizations
+ perform the ``values`` optimization to normalize tables
+ create a symbol table mapping the GFC parameter and record types to
fixed-size arrays, and parameter values and record labels to integers
+ traverse the linearization rules replacing parameters and labels by integers
+ reorganize the created GFC grammar so that it has just one abstract syntax
and one concrete syntax per language
+ apply UTF8 encoding to the grammar, if not yet applied (this is told by the
``coding`` flag)
+ translate the GFC syntax tree to a GFCC syntax tree, using a simple
compositional mapping
+ perform the word-suffix optimization on GFCC linearization terms
+ perform subexpression elimination on each concrete syntax module
+ print out the GFCC code
Notice that a major part of the compilation is done within GFC, so that
GFC-related tasks (such as parser generation) could be performed by
using the old algorithms.
===Problems in GFCC compilation===
Two major problems had to be solved in compiling GFC to GFCC:
- consistent order of tables and records, to permit the array translation
- run-time variables in complex parameter values.
The current implementation is still experimental and may fail
to generate correct code. Any errors remaining are likely to be
related to the two problems just mentioned.
The order problem is solved in different ways for tables and records.
For tables, the ``values`` optimization of GFC already manages to
maintain a canonical order. But this order can be destroyed by the
``share`` optimization. To make sure that GFCC compilation works properly,
it is safest to recompile the GF grammar by using the ``values``
optimization flag.
Records can be canonically ordered by sorting them by labels.
In fact, this was done in connection of the GFCC work as a part
of the GFC generation, to guarantee consistency. This means that
e.g. the ``s`` field will in general no longer appear as the first
field, even if it does so in the GF source code. But relying on the
order of fields in a labelled record would be misplaced anyway.
The canonical form of records is further complicated by lock fields,
i.e. dummy fields of form ``lock_C = <>``, which are added to grammar
libraries to force intensionality of linearization types. The problem
is that the absence of a lock field only generates a warning, not
an error. Therefore a GFC grammar can contain objects of the same
type with and without a lock field. This problem was solved in GFCC
generation by just removing all lock fields (defined as fields whose
type is the empty record type). This has the further advantage of
(slightly) reducing the grammar size. More importantly, it is safe
to remove lock fields, because they are never used in computation,
and because intensional types are only needed in grammars reused
as libraries, not in grammars used at runtime.
While the order problem is rather bureaucratic in nature, run-time
variables are an interesting problem. They arise in the presence
of complex parameter values, created by argument-taking constructors
and parameter records. To give an example, consider the GF parameter
type system
```
Number = Sg | Pl ;
Person = P1 | P2 | P3 ;
Agr = Ag Number Person ;
```
The values can be translated to integers in the expected way,
```
Sg = 0, Pl = 1
P1 = 0, P2 = 1, P3 = 2
Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
```
However, an argument of ``Agr`` can be a run-time variable, as in
```
Ag np.n P3
```
This expression must first be translated to a case expression,
```
case np.n of {
0 => 2 ;
1 => 5
}
```
which can then be translated to the GFCC term
```
([2,5] ! ($0 ! $1))
```
assuming that the variable ``np`` is the first argument and that its
``Number`` field is the second in the record.
This transformation of course has to be performed recursively, since
there can be several run-time variables in a parameter value:
```
Ag np.n np.p
```
A similar transformation would be possible to deal with the double
role of parameter records discussed above. Thus the type
```
RNP = {n : Number ; p : Person}
```
could be uniformly translated into the set ``{0,1,2,3,4,5}``
as ``Agr`` above. Selections would be simple instances of indexing.
But any projection from the record should be translated into
a case expression,
```
rnp.n ===>
case rnp of {
0 => 0 ;
1 => 0 ;
2 => 0 ;
3 => 1 ;
4 => 1 ;
5 => 1
}
```
To avoid the code bloat resulting from this, we chose the alias representation
which is easy enough to deal with in interpreters.
===The representation of linearization types===
Linearization types (``lincat``) are not needed when generating with
GFCC, but they have been added to enable parser generation directly from
GFCC. The linearization type definitions are shown as a part of the
concrete syntax, by using terms to represent types. Here is the table
showing how different linearization types are encoded.
```
P* = size(P) -- parameter type
{_ : I ; __ : R}* = (I* @ R*) -- record of parameters
{r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- other record
(P => T)* = [T* ,...,T*] -- size(P) times
Str* = ()
```
The category symbols are prefixed with two underscores (``__``).
For example, the linearization type ``present/CatEng.NP`` is
translated as follows:
```
NP = {
a : { -- 6 = 2*3 values
n : {ParamX.Number} ; -- 2 values
p : {ParamX.Person} -- 3 values
} ;
s : {ResEng.Case} => Str -- 3 values
}
__NP = [(6@[2,3]),[(),(),()]]
```
===Running the compiler and the GFCC interpreter===
GFCC generation is a part of the
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
of GF since September 2006. To invoke the compiler, the flag
``-printer=gfcc`` to the command
``pm = print_multi`` is used. It is wise to recompile the grammar from
source, since previously compiled libraries may not obey the canonical
order of records. To ``strip`` the grammar before
GFCC translation removes unnecessary interface references.
Here is an example, performed in
[example/bronzeage ../../../../../examples/bronzeage].
```
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
strip
pm -printer=gfcc | wf bronze.gfcc
```
==The reference interpreter==
The reference interpreter written in Haskell consists of the following files:
```
-- source file for BNFC
GFCC.cf -- labelled BNF grammar of gfcc
-- files generated by BNFC
AbsGFCC.hs -- abstrac syntax of gfcc
ErrM.hs -- error monad used internally
LexGFCC.hs -- lexer of gfcc files
ParGFCC.hs -- parser of gfcc files and syntax trees
PrintGFCC.hs -- printer of gfcc files and syntax trees
-- hand-written files
DataGFCC.hs -- post-parser grammar creation, linearization and evaluation
GenGFCC.hs -- random and exhaustive generation, generate-and-test parsing
RunGFCC.hs -- main function - a simple command interpreter
```
It is included in the
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
of GF, in the subdirectory [``GF/src/GF/Canon/GFCC`` ../].
To compile the interpreter, type
```
make gfcc
```
in ``GF/src``. To run it, type
```
./gfcc <GFCC-file>
```
The available commands are
- ``gr <Cat> <Int>``: generate a number of random trees in category.
and show their linearizations in all languages
- ``grt <Cat> <Int>``: generate a number of random trees in category.
and show the trees and their linearizations in all languages
- ``gt <Cat> <Int>``: generate a number of trees in category from smallest,
and show their linearizations in all languages
- ``gtt <Cat> <Int>``: generate a number of trees in category from smallest,
and show the trees and their linearizations in all languages
- ``p <Int> <Cat> <String>``: "parse", i.e. generate trees until match or
until the given number have been generated
- ``<Tree>``: linearize tree in all languages, also showing full records
- ``quit``: terminate the system cleanly
==Interpreter in C++==
A base-line interpreter in C++ has been started.
Its main functionality is random generation of trees and linearization of them.
Here are some results from running the different interpreters, compared
to running the same grammar in GF, saved in ``.gfcm`` format.
The grammar contains the English, German, and Norwegian
versions of Bronzeage. The experiment was carried out on
Ubuntu Linux laptop with 1.5 GHz Intel centrino processor.
|| | GF | gfcc(hs) | gfcc++ |
| program size | 7249k | 803k | 113k
| grammar size | 336k | 119k | 119k
| read grammar | 1150ms | 510ms | 100ms
| generate 222 | 9500ms | 450ms | 800ms
| memory | 21M | 10M | 20M
To summarize:
- going from GF to gfcc is a major win in both code size and efficiency
- going from Haskell to C++ interpreter is not a win yet, because of a space
leak in the C++ version
==Some things to do==
Interpreter in Java.
Parsing via MCFG
- the FCFG format can possibly be simplified
- parser grammars should be saved in files to make interpreters easier
Hand-written parsers for GFCC grammars to reduce code size
(and efficiency?) of interpreters.
Binary format and/or file compression of GFCC output.
Syntax editor based on GFCC.
Rewriting of resource libraries in order to exploit the
word-suffix sharing better (depth-one tables, as in FM).

View File

@@ -1,180 +0,0 @@
GFCC Syntax
==Syntax of GFCC files==
The parser syntax is very simple, as defined in BNF:
```
Grm. Grammar ::= [RExp] ;
App. RExp ::= "(" CId [RExp] ")" ;
AId. RExp ::= CId ;
AInt. RExp ::= Integer ;
AStr. RExp ::= String ;
AFlt. RExp ::= Double ;
AMet. RExp ::= "?" ;
terminator RExp "" ;
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
```
While a parser and a printer can be generated for many languages
from this grammar by using the BNF Converter, a parser is also
easy to write by hand using recursive descent.
==Syntax of well-formed GFCC code==
Here is a summary of well-formed syntax,
with a comment on the semantics of each construction.
```
Grammar ::=
("grammar" CId CId*) -- abstract syntax name and concrete syntax names
"(" "flags" Flag* ")" -- global and abstract flags
"(" "abstract" Abstract ")" -- abstract syntax
"(" "concrete" Concrete* ")" -- concrete syntaxes
Abstract ::=
"(" "fun" FunDef* ")" -- function definitions
"(" "cat" CatDef* ")" -- category definitions
Concrete ::=
"(" CId -- language name
"flags" Flag* -- concrete flags
"lin" LinDef* -- linearization rules
"oper" LinDef* -- operations (macros)
"lincat" LinDef* -- linearization type definitions
"lindef" LinDef* -- linearization default definitions
"printname" LinDef* -- printname definitions
"param" LinDef* -- lincats with labels and parameter value names
")"
Flag ::= "(" CId String ")" -- flag and value
FunDef ::= "(" CId Type Exp ")" -- function, type, and definition
CatDef ::= "(" CId Hypo* ")" -- category and context
LinDef ::= "(" CId Term ")" -- function and definition
Type ::=
"(" CId -- value category
"(" "H" Hypo* ")" -- argument context
"(" "X" Exp* ")" ")" -- arguments (of dependent value type)
Exp ::=
"(" CId -- function
"(" "B" CId* ")" -- bindings
"(" "X" Exp* ")" ")" -- arguments
| CId -- variable
| "?" -- metavariable
| "(" "Eq" Equation* ")" -- group of pattern equations
| Integer -- integer literal (non-negative)
| Float -- floating-point literal (non-negative)
| String -- string literal (in double quotes)
Hypo ::= "(" CId Type ")" -- variable and type
Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list
Term ::=
"(" "R" Term* ")" -- array (record or table)
| "(" "S" Term* ")" -- concatenated sequence
| "(" "FV" Term* ")" -- free variant list
| "(" "P" Term Term ")" -- access to index (projection or selection)
| "(" "W" String Term ")" -- token prefix with suffix list
| "(" "A" Integer ")" -- pointer to subtree
| String -- token (in double quotes)
| Integer -- index in array
| CId -- macro constant
| "?" -- metavariable
```
==GFCC interpreter==
The first phase in interpreting GFCC is to parse a GFCC file and
build an internal abstract syntax representation, as specified
in the previous section.
With this representation, linearization can be performed by
a straightforward function from expressions (``Exp``) to terms
(``Term``). All expressions except groups of pattern equations
can be linearized.
Here is a reference Haskell implementation of linearization:
```
linExp :: GFCC -> CId -> Exp -> Term
linExp gfcc lang tree@(DTr _ at trees) = case at of
AC fun -> comp (map lin trees) $ look fun
AS s -> R [K (show s)] -- quoted
AI i -> R [K (show i)]
AF d -> R [K (show d)]
AM -> TM
where
lin = linExp gfcc lang
comp = compute gfcc lang
look = lookLin gfcc lang
```
TODO: bindings must be supported.
Terms resulting from linearization are evaluated in
call-by-value order, with two environments needed:
- the grammar (a concrete syntax) to give the global constants
- an array of terms to give the subtree linearizations
The Haskell implementation works as follows:
```
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute gfcc lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
W s t -> W s (comp t)
R ts -> R $ map comp ts
V i -> idx args (fromInteger i) -- already computed
F c -> comp $ look c -- not computed (if contains V)
FV ts -> FV $ Prelude.map comp ts
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -> trm
look = lookOper gfcc lang
idx xs i = xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ Prelude.map (proj r) ts
(FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> fromInteger i
RP p _ -> getIndex p
TM -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
getField t i = case t of
R rs -> idx rs i
RP _ r -> getField r i
TM -> TM
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
```
The result of linearization is usually a record, which is realized as
a string using the following algorithm.
```
realize :: Term -> String
realize trm = case trm of
R (t:_) -> realize t
S ss -> unwords $ map realize ss
K s -> s
W s t -> s ++ realize t
FV (t:_) -> realize t -- TODO: all variants
TM -> "?"
```
Notice that realization always picks the first field of a record.
If a linearization type has more than one field, the first field
does not necessarily contain the desired string.
Also notice that the order of record fields in GFCC is not necessarily
the same as in GF source.

View File

@@ -1,153 +0,0 @@
Procedure for making a GF release:
1. Make sure everything that should be in the release has been
checked in.
2. Go to the src/ dir.
$ cd src
3. Edit configure.ac to set the right version number
(the second argument to the AC_INIT macro).
4. Edit gf.spec to set the version and release numbers
(change %define version and %define release).
5. Commit configure.ac and gf.spec:
$ darcs record -m 'Updated version numbers.' configure.ac gf.spec
6. Run autoconf to generate configure with the right version number:
$ autoconf
7. Go back to the root of the tree.
$ cd ..
8. Tag the release. (X_X should be replaced by the version number, with
_ instead of ., e.g. 2_0)
$ darcs tag -m RELEASE-X_X
9. Push the changes that you made for the release to the main repo:
$ darcs push
10. Build a source package:
$ cd src
$ ./configure
$ make dist
11. (Only if releasing a new grammars distribution)
Build a grammar tarball:
$ cd src
$ ./configure && make grammar-dist
12. Build an x86/linux RPM (should be done on a Mandrake Linux box):
Setup for building RPMs (first time only):
- Make sure that you have the directories neccessary to build
RPMs:
$ mkdir -p ~/rpm/{BUILD,RPMS/i586,RPMS/noarch,SOURCES,SRPMS,SPECS,tmp}
- Create ~/.rpmrc with the following contents:
buildarchtranslate: i386: i586
buildarchtranslate: i486: i586
buildarchtranslate: i586: i586
buildarchtranslate: i686: i586
- Create ~/.rpmmacros with the following contents:
%_topdir %(echo ${HOME}/rpm)
%_tmppath %{_topdir}/tmp
%packager Your Name <yourusername@cs.chalmers.se>
Build the RPM:
$ cd src
$ ./configure && make rpm
13. Build a generic binary x86/linux package (should be done on a Linux box,
e.g. banded.medic.chalmers.se):
$ cd src
$ ./configure --host=i386-pc-linux-gnu && make binary-dist
14. Build a generic binary sparc/solaris package (should be done
on a Solaris box, e.g. remote1.cs.chalmers.se):
$ cd src
$ ./configure --host=sparc-sun-solaris2 && gmake binary-dist
15. Build a Mac OS X package (should be done on a Mac OS X box,
e.g. csmisc99.cs.chalmers.se):
$ cd src
$ ./configure && make binary-dist
Note that to run GHC-compiled binaries on OS X, you need
a "Haskell Support Framework". This should be available
separately from the GF download page.
TODO: Use OS X PackageMaker to build a .pkg-file which can
be installed using the standard OS X Installer program.
16. Build a binary Cygwin package (should be done on a Windows
machine with Cygwin):
$ cd src
$ ./configure && make binary-dist
17. Build a Windows MSI package (FIXME: This doesn't work right,
pathnames with backslashes and spaces are not handled
correctly in Windows. We only release a binary tarball
for Cygwin right now.):
$ cd src
$ ./configure && make all windows-msi
18. Add new GF package release to SourceForge:
- https://sourceforge.net/projects/gf-tools
- Project page -> Admin -> File releases -> Add release (for the
GF package)
- New release name: X.X (just the version number, e.g. 2.2)
- Paste in release notes
- Upload files using anonymous FTP to upload.sourceforge.net
in the incoming directory.
- Add the files to the release and set the processor
and file type for each file (remember to press
Update/Refresh for each file):
* x86 rpm -> i386/.rpm
* source rpm -> Any/Source .rpm
* x86 binary tarball -> i386/.gz
* sparc binary tarball -> Sparc/.gz
* source package -> Any/Source .gz
19. Add new GF-editor release. Repeat the steps above, but
with GF-editor:
- Add files and set properties:
* editor rpm -> i386/.rpm (not really true, but I haven't
figured out how to make noarch rpms from the same spec as
arch-specific ones)
20. Mail to gf-tools-users@lists.sourceforge.net
21. Update website.
22. Party!

View File

@@ -1,967 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<TITLE>Resource grammar writing HOWTO</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>Resource grammar writing HOWTO</H1>
<FONT SIZE="4">
<I>Author: Aarne Ranta &lt;aarne (at) cs.chalmers.se&gt;</I><BR>
Last update: Mon Sep 22 14:28:01 2008
</FONT></CENTER>
<P></P>
<HR NOSHADE SIZE=1>
<P></P>
<UL>
<LI><A HREF="#toc1">The resource grammar structure</A>
<UL>
<LI><A HREF="#toc2">Library API modules</A>
<LI><A HREF="#toc3">Phrase category modules</A>
<LI><A HREF="#toc4">Infrastructure modules</A>
<LI><A HREF="#toc5">Lexical modules</A>
</UL>
<LI><A HREF="#toc6">Language-dependent syntax modules</A>
<UL>
<LI><A HREF="#toc7">The present-tense fragment</A>
</UL>
<LI><A HREF="#toc8">Phases of the work</A>
<UL>
<LI><A HREF="#toc9">Putting up a directory</A>
<LI><A HREF="#toc10">Direction of work</A>
<LI><A HREF="#toc11">The develop-test cycle</A>
<LI><A HREF="#toc12">Auxiliary modules</A>
<LI><A HREF="#toc13">Morphology and lexicon</A>
<LI><A HREF="#toc14">Lock fields</A>
<LI><A HREF="#toc15">Lexicon construction</A>
</UL>
<LI><A HREF="#toc16">Lexicon extension</A>
<UL>
<LI><A HREF="#toc17">The irregularity lexicon</A>
<LI><A HREF="#toc18">Lexicon extraction from a word list</A>
<LI><A HREF="#toc19">Lexicon extraction from raw text data</A>
<LI><A HREF="#toc20">Bootstrapping with smart paradigms</A>
</UL>
<LI><A HREF="#toc21">Extending the resource grammar API</A>
<LI><A HREF="#toc22">Using parametrized modules</A>
<UL>
<LI><A HREF="#toc23">Writing an instance of parametrized resource grammar implementation</A>
<LI><A HREF="#toc24">Parametrizing a resource grammar implementation</A>
</UL>
<LI><A HREF="#toc25">Character encoding and transliterations</A>
<LI><A HREF="#toc26">Coding conventions in GF</A>
<LI><A HREF="#toc27">Transliterations</A>
</UL>
<P></P>
<HR NOSHADE SIZE=1>
<P></P>
<P>
<B>History</B>
</P>
<P>
September 2008: updated for Version 1.5.
</P>
<P>
October 2007: updated for Version 1.2.
</P>
<P>
January 2006: first version.
</P>
<P>
The purpose of this document is to tell how to implement the GF
resource grammar API for a new language. We will <I>not</I> cover how
to use the resource grammar, nor how to change the API. But we
will give some hints how to extend the API.
</P>
<P>
A manual for using the resource grammar is found in
</P>
<P>
<A HREF="../lib/resource/doc/synopsis.html"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html</CODE></A>.
</P>
<P>
A tutorial on GF, also introducing the idea of resource grammars, is found in
</P>
<P>
<A HREF="./gf-tutorial.html"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html</CODE></A>.
</P>
<P>
This document concerns the API v. 1.5, while the current stable release is 1.4.
You can find the code for the stable release in
</P>
<P>
<A HREF="../lib/resource"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/</CODE></A>
</P>
<P>
and the next release in
</P>
<P>
<A HREF="../next-lib/src"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/</CODE></A>
</P>
<P>
It is recommended to build new grammars to match the next release.
</P>
<A NAME="toc1"></A>
<H2>The resource grammar structure</H2>
<P>
The library is divided into a bunch of modules, whose dependencies
are given in the following figure.
</P>
<P>
<IMG ALIGN="left" SRC="Syntax.png" BORDER="0" ALT="">
</P>
<P>
Modules of different kinds are distinguished as follows:
</P>
<UL>
<LI>solid contours: module seen by end users
<LI>dashed contours: internal module
<LI>ellipse: abstract/concrete pair of modules
<LI>rectangle: resource or instance
<LI>diamond: interface
</UL>
<P>
Put in another way:
</P>
<UL>
<LI>solid rectangles and diamonds: user-accessible library API
<LI>solid ellipses: user-accessible top-level grammar for parsing and linearization
<LI>dashed contours: not visible to users
</UL>
<P>
The dashed ellipses form the main parts of the implementation, on which the resource
grammar programmer has to work with. She also has to work on the <CODE>Paradigms</CODE>
module. The rest of the modules can be produced mechanically from corresponding
modules for other languages, by just changing the language codes appearing in
their module headers.
</P>
<P>
The module structure is rather flat: most modules are direct
parents of <CODE>Grammar</CODE>. The idea
is that the implementors can concentrate on one linguistic aspect at a time, or
also distribute the work among several authors. The module <CODE>Cat</CODE>
defines the "glue" that ties the aspects together - a type system
to which all the other modules conform, so that e.g. <CODE>NP</CODE> means
the same thing in those modules that use <CODE>NP</CODE>s and those that
constructs them.
</P>
<A NAME="toc2"></A>
<H3>Library API modules</H3>
<P>
For the user of the library, these modules are the most important ones.
In a typical application, it is enough to open <CODE>Paradigms</CODE> and <CODE>Syntax</CODE>.
The module <CODE>Try</CODE> combines these two, making it possible to experiment
with combinations of syntactic and lexical constructors by using the
<CODE>cc</CODE> command in the GF shell. Here are short explanations of each API module:
</P>
<UL>
<LI><CODE>Try</CODE>: the whole resource library for a language (<CODE>Paradigms</CODE>, <CODE>Syntax</CODE>,
<CODE>Irreg</CODE>, and <CODE>Extra</CODE>);
produced mechanically as a collection of modules
<LI><CODE>Syntax</CODE>: language-independent categories, syntax functions, and structural words;
produced mechanically as a collection of modules
<LI><CODE>Constructors</CODE>: language-independent syntax functions and structural words;
produced mechanically via functor instantiation
<LI><CODE>Paradigms</CODE>: language-dependent morphological paradigms
</UL>
<A NAME="toc3"></A>
<H3>Phrase category modules</H3>
<P>
The immediate parents of <CODE>Grammar</CODE> will be called <B>phrase category modules</B>,
since each of them concentrates on a particular phrase category (nouns, verbs,
adjectives, sentences,...). A phrase category module tells
<I>how to construct phrases in that category</I>. You will find out that
all functions in any of these modules have the same value type (or maybe
one of a small number of different types). Thus we have
</P>
<UL>
<LI><CODE>Noun</CODE>: construction of nouns and noun phrases
<LI><CODE>Adjective</CODE>: construction of adjectival phrases
<LI><CODE>Verb</CODE>: construction of verb phrases
<LI><CODE>Adverb</CODE>: construction of adverbial phrases
<LI><CODE>Numeral</CODE>: construction of cardinal and ordinal numerals
<LI><CODE>Sentence</CODE>: construction of sentences and imperatives
<LI><CODE>Question</CODE>: construction of questions
<LI><CODE>Relative</CODE>: construction of relative clauses
<LI><CODE>Conjunction</CODE>: coordination of phrases
<LI><CODE>Phrase</CODE>: construction of the major units of text and speech
<LI><CODE>Text</CODE>: construction of texts as sequences of phrases
<LI><CODE>Idiom</CODE>: idiomatic expressions such as existentials
</UL>
<A NAME="toc4"></A>
<H3>Infrastructure modules</H3>
<P>
Expressions of each phrase category are constructed in the corresponding
phrase category module. But their <I>use</I> takes mostly place in other modules.
For instance, noun phrases, which are constructed in <CODE>Noun</CODE>, are
used as arguments of functions of almost all other phrase category modules.
How can we build all these modules independently of each other?
</P>
<P>
As usual in typeful programming, the <I>only</I> thing you need to know
about an object you use is its type. When writing a linearization rule
for a GF abstract syntax function, the only thing you need to know is
the linearization types of its value and argument categories. To achieve
the division of the resource grammar to several parallel phrase category modules,
what we need is an underlying definition of the linearization types. This
definition is given as the implementation of
</P>
<UL>
<LI><CODE>Cat</CODE>: syntactic categories of the resource grammar
</UL>
<P>
Any resource grammar implementation has first to agree on how to implement
<CODE>Cat</CODE>. Luckily enough, even this can be done incrementally: you
can skip the <CODE>lincat</CODE> definition of a category and use the default
<CODE>{s : Str}</CODE> until you need to change it to something else. In
English, for instance, many categories do have this linearization type.
</P>
<A NAME="toc5"></A>
<H3>Lexical modules</H3>
<P>
What is lexical and what is syntactic is not as clearcut in GF as in
some other grammar formalisms. Logically, lexical means atom, i.e. a
<CODE>fun</CODE> with no arguments. Linguistically, one may add to this
that the <CODE>lin</CODE> consists of only one token (or of a table whose values
are single tokens). Even in the restricted lexicon included in the resource
API, the latter rule is sometimes violated in some languages. For instance,
<CODE>Structural.both7and_DConj</CODE> is an atom, but its linearization is
two words e.g. <I>both - and</I>.
</P>
<P>
Another characterization of lexical is that lexical units can be added
almost <I>ad libitum</I>, and they cannot be defined in terms of already
given rules. The lexical modules of the resource API are thus more like
samples than complete lists. There are two such modules:
</P>
<UL>
<LI><CODE>Structural</CODE>: structural words (determiners, conjunctions,...)
<LI><CODE>Lexicon</CODE>: basic everyday content words (nouns, verbs,...)
</UL>
<P>
The module <CODE>Structural</CODE> aims for completeness, and is likely to
be extended in future releases of the resource. The module <CODE>Lexicon</CODE>
gives a "random" list of words, which enables testing the syntax.
It also provides a check list for morphology, since those words are likely to include
most morphological patterns of the language.
</P>
<P>
In the case of <CODE>Lexicon</CODE> it may come out clearer than anywhere else
in the API that it is impossible to give exact translation equivalents in
different languages on the level of a resource grammar. This is no problem,
since application grammars can use the resource in different ways for
different languages.
</P>
<A NAME="toc6"></A>
<H2>Language-dependent syntax modules</H2>
<P>
In addition to the common API, there is room for language-dependent extensions
of the resource. The top level of each languages looks as follows (with German
as example):
</P>
<PRE>
abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs
</PRE>
<P>
where <CODE>ExtraGerAbs</CODE> is a collection of syntactic structures specific to German,
and <CODE>IrregGerAbs</CODE> is a dictionary of irregular words of German
(at the moment, just verbs). Each of these language-specific grammars has
the potential to grow into a full-scale grammar of the language. These grammar
can also be used as libraries, but the possibility of using functors is lost.
</P>
<P>
To give a better overview of language-specific structures,
modules like <CODE>ExtraGerAbs</CODE>
are built from a language-independent module <CODE>ExtraAbs</CODE>
by restricted inheritance:
</P>
<PRE>
abstract ExtraGerAbs = Extra [f,g,...]
</PRE>
<P>
Thus any category and function in <CODE>Extra</CODE> may be shared by a subset of all
languages. One can see this set-up as a matrix, which tells
what <CODE>Extra</CODE> structures
are implemented in what languages. For the common API in <CODE>Grammar</CODE>, the matrix
is filled with 1's (everything is implemented in every language).
</P>
<P>
In a minimal resource grammar implementation, the language-dependent
extensions are just empty modules, but it is good to provide them for
the sake of uniformity.
</P>
<A NAME="toc7"></A>
<H3>The present-tense fragment</H3>
<P>
Some lines in the resource library are suffixed with the comment
</P>
<PRE>
--# notpresent
</PRE>
<P>
which is used by a preprocessor to exclude those lines from
a reduced version of the full resource. This present-tense-only
version is useful for applications in most technical text, since
they reduce the grammar size and compilation time. It can also
be useful to exclude those lines in a first version of resource
implementation. To compile a grammar with present-tense-only, use
</P>
<PRE>
make Present
</PRE>
<P>
with <CODE>resource/Makefile</CODE>.
</P>
<A NAME="toc8"></A>
<H2>Phases of the work</H2>
<A NAME="toc9"></A>
<H3>Putting up a directory</H3>
<P>
Unless you are writing an instance of a parametrized implementation
(Romance or Scandinavian), which will be covered later, the
simplest way is to follow roughly the following procedure. Assume you
are building a grammar for the German language. Here are the first steps,
which we actually followed ourselves when building the German implementation
of resource v. 1.0 at Ubuntu linux. We have slightly modified them to
match resource v. 1.5 and GF v. 3.0.
</P>
<OL>
<LI>Create a sister directory for <CODE>GF/lib/resource/english</CODE>, named
<CODE>german</CODE>.
<PRE>
cd GF/lib/resource/
mkdir german
cd german
</PRE>
<P></P>
<LI>Check out the [ISO 639 3-letter language code
<A HREF="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">http://www.w3.org/WAI/ER/IG/ert/iso639.htm</A>]
for German: both <CODE>Ger</CODE> and <CODE>Deu</CODE> are given, and we pick <CODE>Ger</CODE>.
(We use the 3-letter codes rather than the more common 2-letter codes,
since they will suffice for many more languages!)
<P></P>
<LI>Copy the <CODE>*Eng.gf</CODE> files from <CODE>english</CODE> <CODE>german</CODE>,
and rename them:
<PRE>
cp ../english/*Eng.gf .
rename 's/Eng/Ger/' *Eng.gf
</PRE>
If you don't have the <CODE>rename</CODE> command, you can use a bash script with <CODE>mv</CODE>.
</OL>
<OL>
<LI>Change the <CODE>Eng</CODE> module references to <CODE>Ger</CODE> references
in all files:
<PRE>
sed -i 's/English/German/g' *Ger.gf
sed -i 's/Eng/Ger/g' *Ger.gf
</PRE>
The first line prevents changing the word <CODE>English</CODE>, which appears
here and there in comments, to <CODE>Gerlish</CODE>. The <CODE>sed</CODE> command syntax
may vary depending on your operating system.
<P></P>
<LI>This may of course change unwanted occurrences of the
string <CODE>Eng</CODE> - verify this by
<PRE>
grep Ger *.gf
</PRE>
But you will have to make lots of manual changes in all files anyway!
<P></P>
<LI>Comment out the contents of these files:
<PRE>
sed -i 's/^/--/' *Ger.gf
</PRE>
This will give you a set of templates out of which the grammar
will grow as you uncomment and modify the files rule by rule.
<P></P>
<LI>In all <CODE>.gf</CODE> files, uncomment the module headers and brackets,
leaving the module bodies commented. Unfortunately, there is no
simple way to do this automatically (or to avoid commenting these
lines in the previous step) - but uncommenting the first
and the last lines will actually do the job for many of the files.
<P></P>
<LI>Uncomment the contents of the main grammar file:
<PRE>
sed -i 's/^--//' LangGer.gf
</PRE>
<P></P>
<LI>Now you can open the grammar <CODE>LangGer</CODE> in GF:
<PRE>
gf LangGer.gf
</PRE>
You will get lots of warnings on missing rules, but the grammar will compile.
<P></P>
<LI>At all the following steps you will now have a valid, but incomplete
GF grammar. The GF command
<PRE>
pg -missing
</PRE>
tells you what exactly is missing.
</OL>
<P>
Here is the module structure of <CODE>LangGer</CODE>. It has been simplified by leaving out
the majority of the phrase category modules. Each of them has the same dependencies
as <CODE>VerbGer</CODE>, whose complete dependencies are shown as an example.
</P>
<P>
<IMG ALIGN="middle" SRC="German.png" BORDER="0" ALT="">
</P>
<A NAME="toc10"></A>
<H3>Direction of work</H3>
<P>
The real work starts now. There are many ways to proceed, the most obvious ones being
</P>
<UL>
<LI>Top-down: start from the module <CODE>Phrase</CODE> and go down to <CODE>Sentence</CODE>, then
<CODE>Verb</CODE>, <CODE>Noun</CODE>, and in the end <CODE>Lexicon</CODE>. In this way, you are all the time
building complete phrases, and add them with more content as you proceed.
<B>This approach is not recommended</B>. It is impossible to test the rules if
you have no words to apply the constructions to.
<P></P>
<LI>Bottom-up: set as your first goal to implement <CODE>Lexicon</CODE>. To this end, you
need to write <CODE>ParadigmsGer</CODE>, which in turn needs parts of
<CODE>MorphoGer</CODE> and <CODE>ResGer</CODE>.
<B>This approach is not recommended</B>. You can get stuck to details of
morphology such as irregular words, and you don't have enough grasp about
the type system to decide what forms to cover in morphology.
</UL>
<P>
The practical working direction is thus a saw-like motion between the morphological
and top-level modules. Here is a possible course of the work that gives enough
test data and enough general view at any point:
</P>
<OL>
<LI>Define <CODE>Cat.N</CODE> and the required parameter types in <CODE>ResGer</CODE>. As we define
<PRE>
lincat N = {s : Number =&gt; Case =&gt; Str ; g : Gender} ;
</PRE>
we need the parameter types <CODE>Number</CODE>, <CODE>Case</CODE>, and <CODE>Gender</CODE>. The definition
of <CODE>Number</CODE> in <A HREF="../lib/resource/common/ParamX.gf"><CODE>common/ParamX</CODE></A>
works for German, so we
use it and just define <CODE>Case</CODE> and <CODE>Gender</CODE> in <CODE>ResGer</CODE>.
<P></P>
<LI>Define some cases of <CODE>mkN</CODE> in <CODE>ParadigmsGer</CODE>. In this way you can
already implement a huge amount of nouns correctly in <CODE>LexiconGer</CODE>. Actually
just adding the worst-case instance of <CODE>mkN</CODE> (the one taking the most
arguments) should suffice for every noun - but,
since it is tedious to use, you
might proceed to the next step before returning to morphology and defining the
real work horse, <CODE>mkN</CODE> taking two forms and a gender.
<P></P>
<LI>While doing this, you may want to test the resource independently. Do this by
starting the GF shell in the <CODE>resource</CODE> directory, by the commands
<PRE>
&gt; i -retain german/ParadigmsGer
&gt; cc -table mkN "Kirche"
</PRE>
<P></P>
<LI>Proceed to determiners and pronouns in
<CODE>NounGer</CODE> (<CODE>DetCN UsePron DetQuant NumSg DefArt IndefArt UseN</CODE>) and
<CODE>StructuralGer</CODE> (<CODE>i_Pron this_Quant</CODE>). You also need some categories and
parameter types. At this point, it is maybe not possible to find out the final
linearization types of <CODE>CN</CODE>, <CODE>NP</CODE>, <CODE>Det</CODE>, and <CODE>Quant</CODE>, but at least you should
be able to correctly inflect noun phrases such as <I>every airplane</I>:
<PRE>
&gt; i german/LangGer.gf
&gt; l -table DetCN every_Det (UseN airplane_N)
Nom: jeder Flugzeug
Acc: jeden Flugzeug
Dat: jedem Flugzeug
Gen: jedes Flugzeugs
</PRE>
<P></P>
<LI>Proceed to verbs: define <CODE>CatGer.V</CODE>, <CODE>ResGer.VForm</CODE>, and
<CODE>ParadigmsGer.mkV</CODE>. You may choose to exclude <CODE>notpresent</CODE>
cases at this point. But anyway, you will be able to inflect a good
number of verbs in <CODE>Lexicon</CODE>, such as
<CODE>live_V</CODE> (<CODE>mkV "leben"</CODE>).
<P></P>
<LI>Now you can soon form your first sentences: define <CODE>VP</CODE> and
<CODE>Cl</CODE> in <CODE>CatGer</CODE>, <CODE>VerbGer.UseV</CODE>, and <CODE>SentenceGer.PredVP</CODE>.
Even if you have excluded the tenses, you will be able to produce
<PRE>
&gt; i -preproc=./mkPresent german/LangGer.gf
&gt; l -table PredVP (UsePron i_Pron) (UseV live_V)
Pres Simul Pos Main: ich lebe
Pres Simul Pos Inv: lebe ich
Pres Simul Pos Sub: ich lebe
Pres Simul Neg Main: ich lebe nicht
Pres Simul Neg Inv: lebe ich nicht
Pres Simul Neg Sub: ich nicht lebe
</PRE>
You should also be able to parse:
<PRE>
&gt; p -cat=Cl "ich lebe"
PredVP (UsePron i_Pron) (UseV live_V)
</PRE>
<P></P>
<LI>Transitive verbs
(<CODE>CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a</CODE>)
are a natural next step, so that you can
produce <CODE>ich liebe dich</CODE> ("I love you").
<P></P>
<LI>Adjectives (<CODE>CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA</CODE>)
will force you to think about strong and weak declensions, so that you can
correctly inflect <I>mein neuer Wagen, dieser neue Wagen</I>
("my new car, this new car").
<P></P>
<LI>Once you have implemented the set
(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP),
you have overcome most of difficulties. You know roughly what parameters
and dependences there are in your language, and you can now proceed very
much in the order you please.
</OL>
<A NAME="toc11"></A>
<H3>The develop-test cycle</H3>
<P>
The following develop-test cycle will
be applied most of the time, both in the first steps described above
and in later steps where you are more on your own.
</P>
<OL>
<LI>Select a phrase category module, e.g. <CODE>NounGer</CODE>, and uncomment some
linearization rules (for instance, <CODE>DetCN</CODE>, as above).
<P></P>
<LI>Write down some German examples of this rule, for instance translations
of "the dog", "the house", "the big house", etc. Write these in all their
different forms (two numbers and four cases).
<P></P>
<LI>Think about the categories involved (<CODE>CN, NP, N, Det</CODE>) and the
variations they have. Encode this in the lincats of <CODE>CatGer</CODE>.
You may have to define some new parameter types in <CODE>ResGer</CODE>.
<P></P>
<LI>To be able to test the construction,
define some words you need to instantiate it
in <CODE>LexiconGer</CODE>. You will also need some regular inflection patterns
in<CODE>ParadigmsGer</CODE>.
<P></P>
<LI>Test by parsing, linearization,
and random generation. In particular, linearization to a table should
be used so that you see all forms produced; the <CODE>treebank</CODE> option
preserves the tree
<PRE>
&gt; gr -cat=NP -number=20 | l -table -treebank
</PRE>
<P></P>
<LI>Save some tree-linearization pairs for later regression testing. You can save
a gold standard treebank and use the Unix <CODE>diff</CODE> command to compare later
linearizations produced from the same list of trees. If you save the trees
in a file <CODE>trees</CODE>, you can do as follows:
<PRE>
&gt; rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank
</PRE>
<P></P>
<LI>A file with trees testing all resource functions is included in the resource,
entitled <CODE>resource/exx-resource.gft</CODE>. A treebank can be created from this by
the Unix command
<PRE>
% runghc Make.hs test langs=Ger
</PRE>
</OL>
<P>
You are likely to run this cycle a few times for each linearization rule
you implement, and some hundreds of times altogether. There are roughly
70 <CODE>cat</CODE>s and
600 <CODE>funs</CODE> in <CODE>Lang</CODE> at the moment; 170 of the <CODE>funs</CODE> are outside the two
lexicon modules).
</P>
<A NAME="toc12"></A>
<H3>Auxiliary modules</H3>
<P>
These auxuliary <CODE>resource</CODE> modules will be written by you.
</P>
<UL>
<LI><CODE>ResGer</CODE>: parameter types and auxiliary operations
(a resource for the resource grammar!)
<LI><CODE>ParadigmsGer</CODE>: complete inflection engine and most important regular paradigms
<LI><CODE>MorphoGer</CODE>: auxiliaries for <CODE>ParadigmsGer</CODE> and <CODE>StructuralGer</CODE>. This need
not be separate from <CODE>ResGer</CODE>.
</UL>
<P>
These modules are language-independent and provided by the existing resource
package.
</P>
<UL>
<LI><CODE>ParamX</CODE>: parameter types used in many languages
<LI><CODE>CommonX</CODE>: implementation of language-uniform categories
such as $Text$ and $Phr$, as well as of
the logical tense, anteriority, and polarity parameters
<LI><CODE>Coordination</CODE>: operations to deal with lists and coordination
<LI><CODE>Prelude</CODE>: general-purpose operations on strings, records,
truth values, etc.
<LI><CODE>Predef</CODE>: general-purpose operations with hard-coded definitions
</UL>
<P>
An important decision is what rules to implement in terms of operations in
<CODE>ResGer</CODE>. The <B>golden rule of functional programming</B> says:
</P>
<UL>
<LI><I>Whenever you find yourself programming by copy and paste, write a function instead!</I>.
</UL>
<P>
This rule suggests that an operation should be created if it is to be
used at least twice. At the same time, a sound principle of <B>vicinity</B> says:
</P>
<UL>
<LI><I>It should not require too much browsing to understand what a piece of code does.</I>
</UL>
<P>
From these two principles, we have derived the following practice:
</P>
<UL>
<LI>If an operation is needed <I>in two different modules</I>,
it should be created in as an <CODE>oper</CODE> in <CODE>ResGer</CODE>. An example is <CODE>mkClause</CODE>,
used in <CODE>Sentence</CODE>, <CODE>Question</CODE>, and <CODE>Relative</CODE>-
<LI>If an operation is needed <I>twice in the same module</I>, but never
outside, it should be created in the same module. Many examples are
found in <CODE>Numerals</CODE>.
<LI>If an operation is needed <I>twice in the same judgement</I>, but never
outside, it should be created by a <CODE>let</CODE> definition.
<LI>If an operation is only needed once, it should not be created as an <CODE>oper</CODE>,
but rather inlined. However, a <CODE>let</CODE> definition may well be in place just
to make the readable.
Most functions in phrase category modules
are implemented in this way.
</UL>
<P>
This discipline is very different from the one followed in early
versions of the library (up to 0.9). We then valued the principle of
abstraction more than vicinity, creating layers of abstraction for
almost everything. This led in practice to the duplication of almost
all code on the <CODE>lin</CODE> and <CODE>oper</CODE> levels, and made the code
hard to understand and maintain.
</P>
<A NAME="toc13"></A>
<H3>Morphology and lexicon</H3>
<P>
The paradigms needed to implement
<CODE>LexiconGer</CODE> are defined in
<CODE>ParadigmsGer</CODE>.
This module provides high-level ways to define the linearization of
lexical items, of categories <CODE>N, A, V</CODE> and their complement-taking
variants.
</P>
<P>
For ease of use, the <CODE>Paradigms</CODE> modules follow a certain
naming convention. Thus they for each lexical category, such as <CODE>N</CODE>,
the overloaded functions, such as <CODE>mkN</CODE>, with the following cases:
</P>
<UL>
<LI>the worst-case construction of <CODE>N</CODE>. Its type signature
has the form
<PRE>
mkN : Str -&gt; ... -&gt; Str -&gt; P -&gt; ... -&gt; Q -&gt; N
</PRE>
with as many string and parameter arguments as can ever be needed to
construct an <CODE>N</CODE>.
<LI>the most regular cases, with just one string argument:
<PRE>
mkN : Str -&gt; N
</PRE>
<LI>A language-dependent (small) set of functions to handle mild irregularities
and common exceptions.
</UL>
<P>
For the complement-taking variants, such as <CODE>V2</CODE>, we provide
</P>
<UL>
<LI>a case that takes a <CODE>V</CODE> and all necessary arguments, such
as case and preposition:
<PRE>
mkV2 : V -&gt; Case -&gt; Str -&gt; V2 ;
</PRE>
<LI>a case that takes a <CODE>Str</CODE> and produces a transitive verb with the direct
object case:
<PRE>
mkV2 : Str -&gt; V2 ;
</PRE>
<LI>A language-dependent (small) set of functions to handle common special cases,
such as transitive verbs that are not regular:
<PRE>
mkV2 : V -&gt; V2 ;
</PRE>
</UL>
<P>
The golden rule for the design of paradigms is that
</P>
<UL>
<LI><I>The user of the library will only need function applications with constants and strings, never any records or tables.</I>
</UL>
<P>
The discipline of data abstraction moreover requires that the user of the resource
is not given access to parameter constructors, but only to constants that denote
them. This gives the resource grammarian the freedom to change the underlying
data representation if needed. It means that the <CODE>ParadigmsGer</CODE> module has
to define constants for those parameter types and constructors that
the application grammarian may need to use, e.g.
</P>
<PRE>
oper
Case : Type ;
nominative, accusative, genitive, dative : Case ;
</PRE>
<P>
These constants are defined in terms of parameter types and constructors
in <CODE>ResGer</CODE> and <CODE>MorphoGer</CODE>, which modules are not
visible to the application grammarian.
</P>
<A NAME="toc14"></A>
<H3>Lock fields</H3>
<P>
An important difference between <CODE>MorphoGer</CODE> and
<CODE>ParadigmsGer</CODE> is that the former uses "raw" record types
for word classes, whereas the latter used category symbols defined in
<CODE>CatGer</CODE>. When these category symbols are used to denote
record types in a resource modules, such as <CODE>ParadigmsGer</CODE>,
a <B>lock field</B> is added to the record, so that categories
with the same implementation are not confused with each other.
(This is inspired by the <CODE>newtype</CODE> discipline in Haskell.)
For instance, the lincats of adverbs and conjunctions are the same
in <CODE>CommonX</CODE> (and therefore in <CODE>CatGer</CODE>, which inherits it):
</P>
<PRE>
lincat Adv = {s : Str} ;
lincat Conj = {s : Str} ;
</PRE>
<P>
But when these category symbols are used to denote their linearization
types in resource module, these definitions are translated to
</P>
<PRE>
oper Adv : Type = {s : Str ; lock_Adv : {}} ;
oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
</PRE>
<P>
In this way, the user of a resource grammar cannot confuse adverbs with
conjunctions. In other words, the lock fields force the type checker
to function as grammaticality checker.
</P>
<P>
When the resource grammar is <CODE>open</CODE>ed in an application grammar, the
lock fields are never seen (except possibly in type error messages),
and the application grammarian should never write them herself. If she
has to do this, it is a sign that the resource grammar is incomplete, and
the proper way to proceed is to fix the resource grammar.
</P>
<P>
The resource grammarian has to provide the dummy lock field values
in her hidden definitions of constants in <CODE>Paradigms</CODE>. For instance,
</P>
<PRE>
mkAdv : Str -&gt; Adv ;
-- mkAdv s = {s = s ; lock_Adv = &lt;&gt;} ;
</PRE>
<P></P>
<A NAME="toc15"></A>
<H3>Lexicon construction</H3>
<P>
The lexicon belonging to <CODE>LangGer</CODE> consists of two modules:
</P>
<UL>
<LI><CODE>StructuralGer</CODE>, structural words, built by using both
<CODE>ParadigmsGer</CODE> and <CODE>MorphoGer</CODE>.
<LI><CODE>LexiconGer</CODE>, content words, built by using <CODE>ParadigmsGer</CODE> only.
</UL>
<P>
The reason why <CODE>MorphoGer</CODE> has to be used in <CODE>StructuralGer</CODE>
is that <CODE>ParadigmsGer</CODE> does not contain constructors for closed
word classes such as pronouns and determiners. The reason why we
recommend <CODE>ParadigmsGer</CODE> for building <CODE>LexiconGer</CODE> is that
the coverage of the paradigms gets thereby tested and that the
use of the paradigms in <CODE>LexiconGer</CODE> gives a good set of examples for
those who want to build new lexica.
</P>
<A NAME="toc16"></A>
<H2>Lexicon extension</H2>
<A NAME="toc17"></A>
<H3>The irregularity lexicon</H3>
<P>
It is useful in most languages to provide a separate module of irregular
verbs and other words which are difficult for a lexicographer
to handle. There are usually a limited number of such words - a
few hundred perhaps. Building such a lexicon separately also
makes it less important to cover <I>everything</I> by the
worst-case variants of the paradigms <CODE>mkV</CODE> etc.
</P>
<A NAME="toc18"></A>
<H3>Lexicon extraction from a word list</H3>
<P>
You can often find resources such as lists of
irregular verbs on the internet. For instance, the
Irregular German Verb page
previously found in
<CODE>http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html</CODE>
page gives a list of verbs in the
traditional tabular format, which begins as follows:
</P>
<PRE>
backen (du bäckst, er bäckt) backte [buk] gebacken
befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
beginnen begann (begönne; begänne) begonnen
beißen biß gebissen
</PRE>
<P>
All you have to do is to write a suitable verb paradigm
</P>
<PRE>
irregV : (x1,_,_,_,_,x6 : Str) -&gt; V ;
</PRE>
<P>
and a Perl or Python or Haskell script that transforms
the table to
</P>
<PRE>
backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
</PRE>
<P></P>
<P>
When using ready-made word lists, you should think about
coyright issues. All resource grammar material should
be provided under GNU Lesser General Public License (LGPL).
</P>
<A NAME="toc19"></A>
<H3>Lexicon extraction from raw text data</H3>
<P>
This is a cheap technique to build a lexicon of thousands
of words, if text data is available in digital format.
See the <A HREF="http://www.cs.chalmers.se/~markus/extract/">Extract Homepage</A>
homepage for details.
</P>
<A NAME="toc20"></A>
<H3>Bootstrapping with smart paradigms</H3>
<P>
This is another cheap technique, where you need as input a list of words with
part-of-speech marking. You initialize the lexicon by using the one-argument
<CODE>mkN</CODE> etc paradigms, and add forms to those words that do not come out right.
This procedure is described in the paper
</P>
<P>
A. Ranta.
How predictable is Finnish morphology? An experiment on lexicon construction.
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
<I>Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein</I>,
University of Uppsala,
2008.
Available from the <A HREF="http://publications.uu.se/abstract.xsql?dbid=8933">series homepage</A>
</P>
<A NAME="toc21"></A>
<H2>Extending the resource grammar API</H2>
<P>
Sooner or later it will happen that the resource grammar API
does not suffice for all applications. A common reason is
that it does not include idiomatic expressions in a given language.
The solution then is in the first place to build language-specific
extension modules, like <CODE>ExtraGer</CODE>.
</P>
<A NAME="toc22"></A>
<H2>Using parametrized modules</H2>
<A NAME="toc23"></A>
<H3>Writing an instance of parametrized resource grammar implementation</H3>
<P>
Above we have looked at how a resource implementation is built by
the copy and paste method (from English to German), that is, formally
speaking, from scratch. A more elegant solution available for
families of languages such as Romance and Scandinavian is to
use parametrized modules. The advantages are
</P>
<UL>
<LI>theoretical: linguistic generalizations and insights
<LI>practical: maintainability improves with fewer components
</UL>
<P>
Here is a set of
<A HREF="http://www.cs.chalmers.se/~aarne/geocal2006.pdf">slides</A>
on the topic.
</P>
<A NAME="toc24"></A>
<H3>Parametrizing a resource grammar implementation</H3>
<P>
This is the most demanding form of resource grammar writing.
We do <I>not</I> recommend the method of parametrizing from the
beginning: it is easier to have one language first implemented
in the conventional way and then add another language of the
same family by aprametrization. This means that the copy and
paste method is still used, but at this time the differences
are put into an <CODE>interface</CODE> module.
</P>
<A NAME="toc25"></A>
<H2>Character encoding and transliterations</H2>
<P>
This section is relevant for languages using a non-ASCII character set.
</P>
<A NAME="toc26"></A>
<H2>Coding conventions in GF</H2>
<P>
From version 3.0, GF follows a simple encoding convention:
</P>
<UL>
<LI>GF source files may follow any encoding, such as isolatin-1 or UTF-8;
the default is isolatin-1, and UTF8 must be indicated by the judgement
<PRE>
flags coding = utf8 ;
</PRE>
in each source module.
<LI>for internal processing, all characters are converted to 16-bit unicode,
as the first step of grammar compilation guided by the <CODE>coding</CODE> flag
<LI>as the last step of compilation, all characters are converted to UTF-8
<LI>thus, GF object files (<CODE>gfo</CODE>) and the Portable Grammar Format (<CODE>pgf</CODE>)
are in UTF-8
</UL>
<P>
Most current resource grammars use isolatin-1 in the source, but this does
not affect their use in parallel with grammars written in other encodings.
In fact, a grammar can be put up from modules using different codings.
</P>
<P>
<B>Warning</B>. While string literals may contain any characters, identifiers
must be isolatin-1 letters (or digits, underscores, or dashes). This has to
do with the restrictions of the lexer tool that is used.
</P>
<A NAME="toc27"></A>
<H2>Transliterations</H2>
<P>
While UTF-8 is well supported by most web browsers, its use in terminals and
text editors may cause disappointment. Many grammarians therefore prefer to
use ASCII transliterations. GF 3.0beta2 provides the following built-in
transliterations:
</P>
<UL>
<LI>Arabic
<LI>Devanagari (Hindi)
<LI>Thai
</UL>
<P>
New transliterations can be defined in the GF source file
<A HREF="../src/GF/Text/Transliterations.hs"><CODE>GF/Text/Transliterations.hs</CODE></A>.
This file also gives instructions on how new ones are added.
</P>
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags -\-toc Resource-HOWTO.txt -->
</BODY></HTML>

View File

@@ -1,827 +0,0 @@
Resource grammar writing HOWTO
Author: Aarne Ranta <aarne (at) cs.chalmers.se>
Last update: %%date(%c)
% NOTE: this is a txt2tags file.
% Create an html file from this file using:
% txt2tags --toc -thtml Resource-HOWTO.txt
%!target:html
**History**
September 2008: updated for Version 1.5.
October 2007: updated for Version 1.2.
January 2006: first version.
The purpose of this document is to tell how to implement the GF
resource grammar API for a new language. We will //not// cover how
to use the resource grammar, nor how to change the API. But we
will give some hints how to extend the API.
A manual for using the resource grammar is found in
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html`` ../lib/resource/doc/synopsis.html].
A tutorial on GF, also introducing the idea of resource grammars, is found in
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html`` ./gf-tutorial.html].
This document concerns the API v. 1.5, while the current stable release is 1.4.
You can find the code for the stable release in
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/`` ../lib/resource]
and the next release in
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/`` ../next-lib/src]
It is recommended to build new grammars to match the next release.
==The resource grammar structure==
The library is divided into a bunch of modules, whose dependencies
are given in the following figure.
[Syntax.png]
Modules of different kinds are distinguished as follows:
- solid contours: module seen by end users
- dashed contours: internal module
- ellipse: abstract/concrete pair of modules
- rectangle: resource or instance
- diamond: interface
Put in another way:
- solid rectangles and diamonds: user-accessible library API
- solid ellipses: user-accessible top-level grammar for parsing and linearization
- dashed contours: not visible to users
The dashed ellipses form the main parts of the implementation, on which the resource
grammar programmer has to work with. She also has to work on the ``Paradigms``
module. The rest of the modules can be produced mechanically from corresponding
modules for other languages, by just changing the language codes appearing in
their module headers.
The module structure is rather flat: most modules are direct
parents of ``Grammar``. The idea
is that the implementors can concentrate on one linguistic aspect at a time, or
also distribute the work among several authors. The module ``Cat``
defines the "glue" that ties the aspects together - a type system
to which all the other modules conform, so that e.g. ``NP`` means
the same thing in those modules that use ``NP``s and those that
constructs them.
===Library API modules===
For the user of the library, these modules are the most important ones.
In a typical application, it is enough to open ``Paradigms`` and ``Syntax``.
The module ``Try`` combines these two, making it possible to experiment
with combinations of syntactic and lexical constructors by using the
``cc`` command in the GF shell. Here are short explanations of each API module:
- ``Try``: the whole resource library for a language (``Paradigms``, ``Syntax``,
``Irreg``, and ``Extra``);
produced mechanically as a collection of modules
- ``Syntax``: language-independent categories, syntax functions, and structural words;
produced mechanically as a collection of modules
- ``Constructors``: language-independent syntax functions and structural words;
produced mechanically via functor instantiation
- ``Paradigms``: language-dependent morphological paradigms
===Phrase category modules===
The immediate parents of ``Grammar`` will be called **phrase category modules**,
since each of them concentrates on a particular phrase category (nouns, verbs,
adjectives, sentences,...). A phrase category module tells
//how to construct phrases in that category//. You will find out that
all functions in any of these modules have the same value type (or maybe
one of a small number of different types). Thus we have
- ``Noun``: construction of nouns and noun phrases
- ``Adjective``: construction of adjectival phrases
- ``Verb``: construction of verb phrases
- ``Adverb``: construction of adverbial phrases
- ``Numeral``: construction of cardinal and ordinal numerals
- ``Sentence``: construction of sentences and imperatives
- ``Question``: construction of questions
- ``Relative``: construction of relative clauses
- ``Conjunction``: coordination of phrases
- ``Phrase``: construction of the major units of text and speech
- ``Text``: construction of texts as sequences of phrases
- ``Idiom``: idiomatic expressions such as existentials
===Infrastructure modules===
Expressions of each phrase category are constructed in the corresponding
phrase category module. But their //use// takes mostly place in other modules.
For instance, noun phrases, which are constructed in ``Noun``, are
used as arguments of functions of almost all other phrase category modules.
How can we build all these modules independently of each other?
As usual in typeful programming, the //only// thing you need to know
about an object you use is its type. When writing a linearization rule
for a GF abstract syntax function, the only thing you need to know is
the linearization types of its value and argument categories. To achieve
the division of the resource grammar to several parallel phrase category modules,
what we need is an underlying definition of the linearization types. This
definition is given as the implementation of
- ``Cat``: syntactic categories of the resource grammar
Any resource grammar implementation has first to agree on how to implement
``Cat``. Luckily enough, even this can be done incrementally: you
can skip the ``lincat`` definition of a category and use the default
``{s : Str}`` until you need to change it to something else. In
English, for instance, many categories do have this linearization type.
===Lexical modules===
What is lexical and what is syntactic is not as clearcut in GF as in
some other grammar formalisms. Logically, lexical means atom, i.e. a
``fun`` with no arguments. Linguistically, one may add to this
that the ``lin`` consists of only one token (or of a table whose values
are single tokens). Even in the restricted lexicon included in the resource
API, the latter rule is sometimes violated in some languages. For instance,
``Structural.both7and_DConj`` is an atom, but its linearization is
two words e.g. //both - and//.
Another characterization of lexical is that lexical units can be added
almost //ad libitum//, and they cannot be defined in terms of already
given rules. The lexical modules of the resource API are thus more like
samples than complete lists. There are two such modules:
- ``Structural``: structural words (determiners, conjunctions,...)
- ``Lexicon``: basic everyday content words (nouns, verbs,...)
The module ``Structural`` aims for completeness, and is likely to
be extended in future releases of the resource. The module ``Lexicon``
gives a "random" list of words, which enables testing the syntax.
It also provides a check list for morphology, since those words are likely to include
most morphological patterns of the language.
In the case of ``Lexicon`` it may come out clearer than anywhere else
in the API that it is impossible to give exact translation equivalents in
different languages on the level of a resource grammar. This is no problem,
since application grammars can use the resource in different ways for
different languages.
==Language-dependent syntax modules==
In addition to the common API, there is room for language-dependent extensions
of the resource. The top level of each languages looks as follows (with German
as example):
```
abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs
```
where ``ExtraGerAbs`` is a collection of syntactic structures specific to German,
and ``IrregGerAbs`` is a dictionary of irregular words of German
(at the moment, just verbs). Each of these language-specific grammars has
the potential to grow into a full-scale grammar of the language. These grammar
can also be used as libraries, but the possibility of using functors is lost.
To give a better overview of language-specific structures,
modules like ``ExtraGerAbs``
are built from a language-independent module ``ExtraAbs``
by restricted inheritance:
```
abstract ExtraGerAbs = Extra [f,g,...]
```
Thus any category and function in ``Extra`` may be shared by a subset of all
languages. One can see this set-up as a matrix, which tells
what ``Extra`` structures
are implemented in what languages. For the common API in ``Grammar``, the matrix
is filled with 1's (everything is implemented in every language).
In a minimal resource grammar implementation, the language-dependent
extensions are just empty modules, but it is good to provide them for
the sake of uniformity.
===The present-tense fragment===
Some lines in the resource library are suffixed with the comment
```
--# notpresent
```
which is used by a preprocessor to exclude those lines from
a reduced version of the full resource. This present-tense-only
version is useful for applications in most technical text, since
they reduce the grammar size and compilation time. It can also
be useful to exclude those lines in a first version of resource
implementation. To compile a grammar with present-tense-only, use
```
make Present
```
with ``resource/Makefile``.
==Phases of the work==
===Putting up a directory===
Unless you are writing an instance of a parametrized implementation
(Romance or Scandinavian), which will be covered later, the
simplest way is to follow roughly the following procedure. Assume you
are building a grammar for the German language. Here are the first steps,
which we actually followed ourselves when building the German implementation
of resource v. 1.0 at Ubuntu linux. We have slightly modified them to
match resource v. 1.5 and GF v. 3.0.
+ Create a sister directory for ``GF/lib/resource/english``, named
``german``.
```
cd GF/lib/resource/
mkdir german
cd german
```
+ Check out the [ISO 639 3-letter language code
http://www.w3.org/WAI/ER/IG/ert/iso639.htm]
for German: both ``Ger`` and ``Deu`` are given, and we pick ``Ger``.
(We use the 3-letter codes rather than the more common 2-letter codes,
since they will suffice for many more languages!)
+ Copy the ``*Eng.gf`` files from ``english`` ``german``,
and rename them:
```
cp ../english/*Eng.gf .
rename 's/Eng/Ger/' *Eng.gf
```
If you don't have the ``rename`` command, you can use a bash script with ``mv``.
+ Change the ``Eng`` module references to ``Ger`` references
in all files:
```
sed -i 's/English/German/g' *Ger.gf
sed -i 's/Eng/Ger/g' *Ger.gf
```
The first line prevents changing the word ``English``, which appears
here and there in comments, to ``Gerlish``. The ``sed`` command syntax
may vary depending on your operating system.
+ This may of course change unwanted occurrences of the
string ``Eng`` - verify this by
```
grep Ger *.gf
```
But you will have to make lots of manual changes in all files anyway!
+ Comment out the contents of these files:
```
sed -i 's/^/--/' *Ger.gf
```
This will give you a set of templates out of which the grammar
will grow as you uncomment and modify the files rule by rule.
+ In all ``.gf`` files, uncomment the module headers and brackets,
leaving the module bodies commented. Unfortunately, there is no
simple way to do this automatically (or to avoid commenting these
lines in the previous step) - but uncommenting the first
and the last lines will actually do the job for many of the files.
+ Uncomment the contents of the main grammar file:
```
sed -i 's/^--//' LangGer.gf
```
+ Now you can open the grammar ``LangGer`` in GF:
```
gf LangGer.gf
```
You will get lots of warnings on missing rules, but the grammar will compile.
+ At all the following steps you will now have a valid, but incomplete
GF grammar. The GF command
```
pg -missing
```
tells you what exactly is missing.
Here is the module structure of ``LangGer``. It has been simplified by leaving out
the majority of the phrase category modules. Each of them has the same dependencies
as ``VerbGer``, whose complete dependencies are shown as an example.
[German.png]
===Direction of work===
The real work starts now. There are many ways to proceed, the most obvious ones being
- Top-down: start from the module ``Phrase`` and go down to ``Sentence``, then
``Verb``, ``Noun``, and in the end ``Lexicon``. In this way, you are all the time
building complete phrases, and add them with more content as you proceed.
**This approach is not recommended**. It is impossible to test the rules if
you have no words to apply the constructions to.
- Bottom-up: set as your first goal to implement ``Lexicon``. To this end, you
need to write ``ParadigmsGer``, which in turn needs parts of
``MorphoGer`` and ``ResGer``.
**This approach is not recommended**. You can get stuck to details of
morphology such as irregular words, and you don't have enough grasp about
the type system to decide what forms to cover in morphology.
The practical working direction is thus a saw-like motion between the morphological
and top-level modules. Here is a possible course of the work that gives enough
test data and enough general view at any point:
+ Define ``Cat.N`` and the required parameter types in ``ResGer``. As we define
```
lincat N = {s : Number => Case => Str ; g : Gender} ;
```
we need the parameter types ``Number``, ``Case``, and ``Gender``. The definition
of ``Number`` in [``common/ParamX`` ../lib/resource/common/ParamX.gf]
works for German, so we
use it and just define ``Case`` and ``Gender`` in ``ResGer``.
+ Define some cases of ``mkN`` in ``ParadigmsGer``. In this way you can
already implement a huge amount of nouns correctly in ``LexiconGer``. Actually
just adding the worst-case instance of ``mkN`` (the one taking the most
arguments) should suffice for every noun - but,
since it is tedious to use, you
might proceed to the next step before returning to morphology and defining the
real work horse, ``mkN`` taking two forms and a gender.
+ While doing this, you may want to test the resource independently. Do this by
starting the GF shell in the ``resource`` directory, by the commands
```
> i -retain german/ParadigmsGer
> cc -table mkN "Kirche"
```
+ Proceed to determiners and pronouns in
``NounGer`` (``DetCN UsePron DetQuant NumSg DefArt IndefArt UseN``) and
``StructuralGer`` (``i_Pron this_Quant``). You also need some categories and
parameter types. At this point, it is maybe not possible to find out the final
linearization types of ``CN``, ``NP``, ``Det``, and ``Quant``, but at least you should
be able to correctly inflect noun phrases such as //every airplane//:
```
> i german/LangGer.gf
> l -table DetCN every_Det (UseN airplane_N)
Nom: jeder Flugzeug
Acc: jeden Flugzeug
Dat: jedem Flugzeug
Gen: jedes Flugzeugs
```
+ Proceed to verbs: define ``CatGer.V``, ``ResGer.VForm``, and
``ParadigmsGer.mkV``. You may choose to exclude ``notpresent``
cases at this point. But anyway, you will be able to inflect a good
number of verbs in ``Lexicon``, such as
``live_V`` (``mkV "leben"``).
+ Now you can soon form your first sentences: define ``VP`` and
``Cl`` in ``CatGer``, ``VerbGer.UseV``, and ``SentenceGer.PredVP``.
Even if you have excluded the tenses, you will be able to produce
```
> i -preproc=./mkPresent german/LangGer.gf
> l -table PredVP (UsePron i_Pron) (UseV live_V)
Pres Simul Pos Main: ich lebe
Pres Simul Pos Inv: lebe ich
Pres Simul Pos Sub: ich lebe
Pres Simul Neg Main: ich lebe nicht
Pres Simul Neg Inv: lebe ich nicht
Pres Simul Neg Sub: ich nicht lebe
```
You should also be able to parse:
```
> p -cat=Cl "ich lebe"
PredVP (UsePron i_Pron) (UseV live_V)
```
+ Transitive verbs
(``CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a``)
are a natural next step, so that you can
produce ``ich liebe dich`` ("I love you").
+ Adjectives (``CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA``)
will force you to think about strong and weak declensions, so that you can
correctly inflect //mein neuer Wagen, dieser neue Wagen//
("my new car, this new car").
+ Once you have implemented the set
(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP),
you have overcome most of difficulties. You know roughly what parameters
and dependences there are in your language, and you can now proceed very
much in the order you please.
===The develop-test cycle===
The following develop-test cycle will
be applied most of the time, both in the first steps described above
and in later steps where you are more on your own.
+ Select a phrase category module, e.g. ``NounGer``, and uncomment some
linearization rules (for instance, ``DetCN``, as above).
+ Write down some German examples of this rule, for instance translations
of "the dog", "the house", "the big house", etc. Write these in all their
different forms (two numbers and four cases).
+ Think about the categories involved (``CN, NP, N, Det``) and the
variations they have. Encode this in the lincats of ``CatGer``.
You may have to define some new parameter types in ``ResGer``.
+ To be able to test the construction,
define some words you need to instantiate it
in ``LexiconGer``. You will also need some regular inflection patterns
in``ParadigmsGer``.
+ Test by parsing, linearization,
and random generation. In particular, linearization to a table should
be used so that you see all forms produced; the ``treebank`` option
preserves the tree
```
> gr -cat=NP -number=20 | l -table -treebank
```
+ Save some tree-linearization pairs for later regression testing. You can save
a gold standard treebank and use the Unix ``diff`` command to compare later
linearizations produced from the same list of trees. If you save the trees
in a file ``trees``, you can do as follows:
```
> rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank
```
+ A file with trees testing all resource functions is included in the resource,
entitled ``resource/exx-resource.gft``. A treebank can be created from this by
the Unix command
```
% runghc Make.hs test langs=Ger
```
You are likely to run this cycle a few times for each linearization rule
you implement, and some hundreds of times altogether. There are roughly
70 ``cat``s and
600 ``funs`` in ``Lang`` at the moment; 170 of the ``funs`` are outside the two
lexicon modules).
===Auxiliary modules===
These auxuliary ``resource`` modules will be written by you.
- ``ResGer``: parameter types and auxiliary operations
(a resource for the resource grammar!)
- ``ParadigmsGer``: complete inflection engine and most important regular paradigms
- ``MorphoGer``: auxiliaries for ``ParadigmsGer`` and ``StructuralGer``. This need
not be separate from ``ResGer``.
These modules are language-independent and provided by the existing resource
package.
- ``ParamX``: parameter types used in many languages
- ``CommonX``: implementation of language-uniform categories
such as $Text$ and $Phr$, as well as of
the logical tense, anteriority, and polarity parameters
- ``Coordination``: operations to deal with lists and coordination
- ``Prelude``: general-purpose operations on strings, records,
truth values, etc.
- ``Predef``: general-purpose operations with hard-coded definitions
An important decision is what rules to implement in terms of operations in
``ResGer``. The **golden rule of functional programming** says:
- //Whenever you find yourself programming by copy and paste, write a function instead!//.
This rule suggests that an operation should be created if it is to be
used at least twice. At the same time, a sound principle of **vicinity** says:
- //It should not require too much browsing to understand what a piece of code does.//
From these two principles, we have derived the following practice:
- If an operation is needed //in two different modules//,
it should be created in as an ``oper`` in ``ResGer``. An example is ``mkClause``,
used in ``Sentence``, ``Question``, and ``Relative``-
- If an operation is needed //twice in the same module//, but never
outside, it should be created in the same module. Many examples are
found in ``Numerals``.
- If an operation is needed //twice in the same judgement//, but never
outside, it should be created by a ``let`` definition.
- If an operation is only needed once, it should not be created as an ``oper``,
but rather inlined. However, a ``let`` definition may well be in place just
to make the readable.
Most functions in phrase category modules
are implemented in this way.
This discipline is very different from the one followed in early
versions of the library (up to 0.9). We then valued the principle of
abstraction more than vicinity, creating layers of abstraction for
almost everything. This led in practice to the duplication of almost
all code on the ``lin`` and ``oper`` levels, and made the code
hard to understand and maintain.
===Morphology and lexicon===
The paradigms needed to implement
``LexiconGer`` are defined in
``ParadigmsGer``.
This module provides high-level ways to define the linearization of
lexical items, of categories ``N, A, V`` and their complement-taking
variants.
For ease of use, the ``Paradigms`` modules follow a certain
naming convention. Thus they for each lexical category, such as ``N``,
the overloaded functions, such as ``mkN``, with the following cases:
- the worst-case construction of ``N``. Its type signature
has the form
```
mkN : Str -> ... -> Str -> P -> ... -> Q -> N
```
with as many string and parameter arguments as can ever be needed to
construct an ``N``.
- the most regular cases, with just one string argument:
```
mkN : Str -> N
```
- A language-dependent (small) set of functions to handle mild irregularities
and common exceptions.
For the complement-taking variants, such as ``V2``, we provide
- a case that takes a ``V`` and all necessary arguments, such
as case and preposition:
```
mkV2 : V -> Case -> Str -> V2 ;
```
- a case that takes a ``Str`` and produces a transitive verb with the direct
object case:
```
mkV2 : Str -> V2 ;
```
- A language-dependent (small) set of functions to handle common special cases,
such as transitive verbs that are not regular:
```
mkV2 : V -> V2 ;
```
The golden rule for the design of paradigms is that
- //The user of the library will only need function applications with constants and strings, never any records or tables.//
The discipline of data abstraction moreover requires that the user of the resource
is not given access to parameter constructors, but only to constants that denote
them. This gives the resource grammarian the freedom to change the underlying
data representation if needed. It means that the ``ParadigmsGer`` module has
to define constants for those parameter types and constructors that
the application grammarian may need to use, e.g.
```
oper
Case : Type ;
nominative, accusative, genitive, dative : Case ;
```
These constants are defined in terms of parameter types and constructors
in ``ResGer`` and ``MorphoGer``, which modules are not
visible to the application grammarian.
===Lock fields===
An important difference between ``MorphoGer`` and
``ParadigmsGer`` is that the former uses "raw" record types
for word classes, whereas the latter used category symbols defined in
``CatGer``. When these category symbols are used to denote
record types in a resource modules, such as ``ParadigmsGer``,
a **lock field** is added to the record, so that categories
with the same implementation are not confused with each other.
(This is inspired by the ``newtype`` discipline in Haskell.)
For instance, the lincats of adverbs and conjunctions are the same
in ``CommonX`` (and therefore in ``CatGer``, which inherits it):
```
lincat Adv = {s : Str} ;
lincat Conj = {s : Str} ;
```
But when these category symbols are used to denote their linearization
types in resource module, these definitions are translated to
```
oper Adv : Type = {s : Str ; lock_Adv : {}} ;
oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
```
In this way, the user of a resource grammar cannot confuse adverbs with
conjunctions. In other words, the lock fields force the type checker
to function as grammaticality checker.
When the resource grammar is ``open``ed in an application grammar, the
lock fields are never seen (except possibly in type error messages),
and the application grammarian should never write them herself. If she
has to do this, it is a sign that the resource grammar is incomplete, and
the proper way to proceed is to fix the resource grammar.
The resource grammarian has to provide the dummy lock field values
in her hidden definitions of constants in ``Paradigms``. For instance,
```
mkAdv : Str -> Adv ;
-- mkAdv s = {s = s ; lock_Adv = <>} ;
```
===Lexicon construction===
The lexicon belonging to ``LangGer`` consists of two modules:
- ``StructuralGer``, structural words, built by using both
``ParadigmsGer`` and ``MorphoGer``.
- ``LexiconGer``, content words, built by using ``ParadigmsGer`` only.
The reason why ``MorphoGer`` has to be used in ``StructuralGer``
is that ``ParadigmsGer`` does not contain constructors for closed
word classes such as pronouns and determiners. The reason why we
recommend ``ParadigmsGer`` for building ``LexiconGer`` is that
the coverage of the paradigms gets thereby tested and that the
use of the paradigms in ``LexiconGer`` gives a good set of examples for
those who want to build new lexica.
==Lexicon extension==
===The irregularity lexicon===
It is useful in most languages to provide a separate module of irregular
verbs and other words which are difficult for a lexicographer
to handle. There are usually a limited number of such words - a
few hundred perhaps. Building such a lexicon separately also
makes it less important to cover //everything// by the
worst-case variants of the paradigms ``mkV`` etc.
===Lexicon extraction from a word list===
You can often find resources such as lists of
irregular verbs on the internet. For instance, the
Irregular German Verb page
previously found in
``http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html``
page gives a list of verbs in the
traditional tabular format, which begins as follows:
```
backen (du bäckst, er bäckt) backte [buk] gebacken
befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
beginnen begann (begönne; begänne) begonnen
beißen biß gebissen
```
All you have to do is to write a suitable verb paradigm
```
irregV : (x1,_,_,_,_,x6 : Str) -> V ;
```
and a Perl or Python or Haskell script that transforms
the table to
```
backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
```
When using ready-made word lists, you should think about
coyright issues. All resource grammar material should
be provided under GNU Lesser General Public License (LGPL).
===Lexicon extraction from raw text data===
This is a cheap technique to build a lexicon of thousands
of words, if text data is available in digital format.
See the [Extract Homepage http://www.cs.chalmers.se/~markus/extract/]
homepage for details.
===Bootstrapping with smart paradigms===
This is another cheap technique, where you need as input a list of words with
part-of-speech marking. You initialize the lexicon by using the one-argument
``mkN`` etc paradigms, and add forms to those words that do not come out right.
This procedure is described in the paper
A. Ranta.
How predictable is Finnish morphology? An experiment on lexicon construction.
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//,
University of Uppsala,
2008.
Available from the [series homepage http://publications.uu.se/abstract.xsql?dbid=8933]
==Extending the resource grammar API==
Sooner or later it will happen that the resource grammar API
does not suffice for all applications. A common reason is
that it does not include idiomatic expressions in a given language.
The solution then is in the first place to build language-specific
extension modules, like ``ExtraGer``.
==Using parametrized modules==
===Writing an instance of parametrized resource grammar implementation===
Above we have looked at how a resource implementation is built by
the copy and paste method (from English to German), that is, formally
speaking, from scratch. A more elegant solution available for
families of languages such as Romance and Scandinavian is to
use parametrized modules. The advantages are
- theoretical: linguistic generalizations and insights
- practical: maintainability improves with fewer components
Here is a set of
[slides http://www.cs.chalmers.se/~aarne/geocal2006.pdf]
on the topic.
===Parametrizing a resource grammar implementation===
This is the most demanding form of resource grammar writing.
We do //not// recommend the method of parametrizing from the
beginning: it is easier to have one language first implemented
in the conventional way and then add another language of the
same family by aprametrization. This means that the copy and
paste method is still used, but at this time the differences
are put into an ``interface`` module.
==Character encoding and transliterations==
This section is relevant for languages using a non-ASCII character set.
==Coding conventions in GF==
From version 3.0, GF follows a simple encoding convention:
- GF source files may follow any encoding, such as isolatin-1 or UTF-8;
the default is isolatin-1, and UTF8 must be indicated by the judgement
```
flags coding = utf8 ;
```
in each source module.
- for internal processing, all characters are converted to 16-bit unicode,
as the first step of grammar compilation guided by the ``coding`` flag
- as the last step of compilation, all characters are converted to UTF-8
- thus, GF object files (``gfo``) and the Portable Grammar Format (``pgf``)
are in UTF-8
Most current resource grammars use isolatin-1 in the source, but this does
not affect their use in parallel with grammars written in other encodings.
In fact, a grammar can be put up from modules using different codings.
**Warning**. While string literals may contain any characters, identifiers
must be isolatin-1 letters (or digits, underscores, or dashes). This has to
do with the restrictions of the lexer tool that is used.
==Transliterations==
While UTF-8 is well supported by most web browsers, its use in terminals and
text editors may cause disappointment. Many grammarians therefore prefer to
use ASCII transliterations. GF 3.0beta2 provides the following built-in
transliterations:
- Arabic
- Devanagari (Hindi)
- Thai
New transliterations can be defined in the GF source file
[``GF/Text/Transliterations.hs`` ../src/GF/Text/Transliterations.hs].
This file also gives instructions on how new ones are added.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

1497
deprecated/config.guess vendored

File diff suppressed because it is too large Load Diff

View File

@@ -1,37 +0,0 @@
# GF configuration file. configure will produce config.mk from this file
# @configure_input@
PACKAGE_VERSION = @PACKAGE_VERSION@
prefix = @prefix@
exec_prefix = @exec_prefix@
bindir = @bindir@
libdir = @libdir@
datadir = @datadir@
host = @host@
build = @build@
GHCFLAGS = @GHCFLAGS@
CPPFLAGS = @CPPFLAGS@
LDFLAGS = @LDFLAGS@
EXEEXT = @EXEEXT@
INSTALL = @INSTALL@
TAR = @TAR@
GHC = "@GHC@"
GHCI = "@GHCI@"
READLINE = @READLINE@
INTERRUPT = @INTERRUPT@
ATK = @ATK@
ENABLE_JAVA = @ENABLE_JAVA@
JAVAC = "@JAVAC@"
JAR = "@JAR@"

1608
deprecated/config.sub vendored

File diff suppressed because it is too large Load Diff

View File

@@ -1,229 +0,0 @@
dnl Run autoconf to generate configure from this file
AC_INIT([GF],[3.0-beta3],[aarne@cs.chalmers.se],[GF])
AC_PREREQ(2.53)
AC_REVISION($Revision: 1.26 $)
AC_CONFIG_FILES([config.mk gfc])
AC_CANONICAL_HOST
dnl ***********************************************
dnl Executable suffix
dnl ***********************************************
AC_MSG_CHECKING([executable suffix])
case $host_os in
cygwin)
EXEEXT='.exe';;
*)
EXEEXT='';;
esac
AC_MSG_RESULT(['$EXEEXT'])
AC_SUBST(EXEEXT)
dnl ***********************************************
dnl GHC
dnl ***********************************************
AC_ARG_WITH(ghc,
AC_HELP_STRING([--with-ghc=<ghc command>],
[Use a different command instead of
'ghc' for the Haskell compiler.]),
[AC_CHECK_FILE("$withval",GHC="$withval",[AC_PATH_PROG(GHC,"$withval")])],
[AC_PATH_PROG(GHC,ghc)])
GHCI=$(dirname $GHC)/ghci
GHC_VERSION=`$GHC --version | sed -e 's/.*version //'`
AC_MSG_CHECKING([GHC version])
AC_MSG_RESULT($GHC_VERSION)
AC_SUBST(GHC)
AC_SUBST(GHCI)
dnl ***********************************************
dnl readline
dnl ***********************************************
AC_ARG_WITH(readline,
AC_HELP_STRING([--with-readline=<readline alternative>],
[Select which readline implementation to use.
Available alternatives are: 'readline' (GNU readline),
'no' (don't use readline)
(default = readline)]),
[if test "$withval" = "yes"; then
READLINE="readline"
else
READLINE="$withval"
fi],
[if test "$host_os" = "cygwin"; then
AC_MSG_WARN([There are problems with readline for Windows,
for example, pipe characters do not work.
Disabling readline support.
Use --with-readline to override.])
READLINE="no"
else
READLINE="readline"
fi])
case $READLINE in
readline)
;;
no)
;;
*)
AC_MSG_ERROR([Bad value for --with-readline: $READLINE])
;;
esac
AC_SUBST(READLINE)
dnl ***********************************************
dnl command interruption
dnl ***********************************************
AC_ARG_WITH(interrupt,
AC_HELP_STRING([--with-interrupt=<allow command interruption>],
[Choose whether to enable interruption of commands
with SIGINT (Ctrl-C)
Available alternatives are: 'yes', 'no'
(default = yes)]),
[INTERRUPT="$withval"],
[if test "$host_os" = "cygwin"; then
AC_MSG_WARN([Command interruption does not work under
Cygwin, because of missing signal handler support.
Disabling command interruption support.
Use --with-interrupt to override.])
INTERRUPT="no"
else
INTERRUPT="yes"
fi])
case $INTERRUPT in
yes)
;;
no)
;;
*)
AC_MSG_ERROR([Bad value for --with-interrupt: $INTERRUPT])
;;
esac
AC_SUBST(INTERRUPT)
dnl ***********************************************
dnl ATK speech recognition
dnl ***********************************************
AC_ARG_WITH(atk,
AC_HELP_STRING([--with-atk=<use ATK speech recognition>],
[Choose whether to compile in support for speech
recognition using ATK. Requires ATK and libatkrec.
Available alternatives are: 'yes', 'no'
(default = no)]),
[ATK="$withval"],
[ATK="no"])
case $ATK in
yes)
AC_MSG_CHECKING([for atkrec package])
ATKREC_VERSION=`ghc-pkg field atkrec version`
if test "$ATKREC_VERSION" = ""; then
AC_MSG_RESULT(['not found'])
AC_MSG_WARN([Disabling ATK support.])
ATK="no"
else
AC_MSG_RESULT([$ATKREC_VERSION])
fi
;;
no)
;;
*)
AC_MSG_ERROR([Bad value for --with-atk: $ATK])
;;
esac
AC_SUBST(ATK)
dnl ***********************************************
dnl java stuff
dnl ***********************************************
AC_ARG_ENABLE(java,
AC_HELP_STRING([--enable-java],
[Build Java components. (default = yes)]),
[ENABLE_JAVA="$enableval"],
[ENABLE_JAVA=yes]
)
if test "$ENABLE_JAVA" = "yes"; then
AC_ARG_WITH(javac,
AC_HELP_STRING([--with-javac=<javac command>],
[Use a different command instead of
'javac' for the Java compiler.]),
[AC_CHECK_FILE("$withval",JAVAC="$withval",[AC_PATH_PROG(JAVAC,"$withval")])],
[AC_PATH_PROG(JAVAC,javac)])
AC_SUBST(JAVAC)
AC_ARG_WITH(java,
AC_HELP_STRING([--with-java=<java command>],
[Use a different command instead of
'java' for the Java Virtual Machine.]),
[AC_CHECK_FILE("$withval",JAVA="$withval",[AC_PATH_PROG(JAVA,"$withval")])],
[AC_PATH_PROG(JAVA,java)])
AC_SUBST(JAVA)
AC_ARG_WITH(jar,
AC_HELP_STRING([--with-jar=<jar command>],
[Use a different command instead of
'jar' for the Java archive tool.]),
[AC_CHECK_FILE("$withval",JAR="$withval",[AC_PATH_PROG(JAR,"$withval")])],
[AC_PATH_PROG(JAR,jar)])
AC_SUBST(JAR)
if test "$JAVAC" = "" || test ! -x "$JAVAC" \
|| test "$JAVA" = "" || test ! -x "$JAVA" \
|| test "$JAR" = "" || test ! -x "$JAR"; then
AC_MSG_WARN([Not building Java components.])
ENABLE_JAVA=no
fi
fi
AC_SUBST(ENABLE_JAVA)
dnl ***********************************************
dnl TAR
dnl ***********************************************
AC_CHECK_PROGS(TAR, gtar tar)
dnl ***********************************************
dnl Other programs
dnl ***********************************************
AC_PROG_INSTALL
dnl ***********************************************
dnl Program flags
dnl ***********************************************
AC_SUBST(GHCFLAGS)
AC_SUBST(CPPFLAGS)
AC_SUBST(LDFLAGS)
dnl ***********************************************
dnl Output
dnl ***********************************************
AC_OUTPUT

View File

@@ -1,259 +0,0 @@
<html>
<HEAD><META http-equiv=Content-Type content="text/html; charset=utf-8"></HEAD>
<body>
af_tunni : lámma kún síddi? boqól afartón i ków
<p>
albanian : dy mijë tre qind e dyzet e një
<p>
amharic : ሁለት ሺህ ሦስት መቶ ኣርባ ኣንድ
<p>
arabic_classical : الفان و ثلاث مائة و واحد و أربعون
<p>
arabic_modern : ﺍﻟﻔﻴﻦ ﻭ ﺛﻼﺛﻤﺎﺋﺔ ﻭ ﻭﺍﺣﺪ ﻭ ﺃﺭﺑﻌﻴﻦ
<p>
basque : bi mila ta hirurehun berrogei ta bat
<p>
bearlake_slave : nákee lamíl tai lak'o, óno, di,i, honéno, ?ó, l-ée
<p>
bulgarian : две жиляди триста четирисет и едно
<p>
catalan : dos mil tres-cents quaranta - u
<p>
chinese : 贰 仟 零 叁 佰 肆 拾 壹
<p>
croatian : dva hiljade tri stotine četrdeset i jedan
<p>
czech : dva tisíce tr^i sta čtyr^icet jeden
<p>
dagur : hoire miange guarebe jau duci neke
<p>
danish : to tusind og tre hundrede og en og fyrre
<p>
decimal : 2341
<p>
dutch : twee duizend drie honderd een en veertig
<p>
english : two thousand three hundred and forty - one
<p>
finnish : kaksi tuhatta kolme sataa neljä kymmentä yksi
<p>
french : deux mille trois cent quarante et un
<p>
french_swiss : deux mille trois cent quarante et un
<p>
fulfulde : ujine d.id.i temed.d.e tati e chappand.e nai e go'o
<p>
geez : ዕሽራ ወ ሠላስቱ ምእት አርብዓ ወ አሐዱ
<p>
german : zwei tausend drei hundert ein und vierzig
<p>
greek_classical : δισχίλιοι τριακόσιοι τετταράκοντα εἵς
<p>
greek_modern : δύο χιλιάδες τριακόσια σαράντα ένα
<p>
guahibo : aniha sunu akueya sia yana bae kae
<p>
guarani : moko~i ma mpohapy sa~ irundy kua~ petei~
<p>
hebrew_biblical : אלפים ו שלש מאות ו ארבעים ו אחד
<p>
hindi : दो हज़ार तीन सौ एक्तालीस
<p>
hungarian : két ezer három száz negyven egy
<p>
icelandic : tvö Þúsund Þrjú hundrað fjörutíu og einn
<p>
irish : dhá mhíle trí chead dhá fhichead a haon
<p>
italian : due mila tre cento quaranta uno
<p>
japanese : にせん さんびゃく よんぢゅう いち
<p>
kabardian : m&yn&yt' s'a&ys' p'L-'&s'ra z&ra
<p>
kambera : dua riu tailu ngahu patu kambulu hau
<p>
kawaiisu : N
<p>
khmer : bīra bā'na pī raya sē sipa mwya
<p>
khowar : joo hazâr troi shọr oché joo bîsher î
<p>
kodagu : i:ra:yrat mu:nu:yt.a na:padï
<p>
kolyma_yukaghir : N
<p>
kulung : ni habau su chhum lik i
<p>
kwami : dùbúk póllów dálmágí kúnún kán kúu pòD^òw kán múndí
<p>
kwaza : N
<p>
lalo : `n. t'w sa há i tjhí tjh`&
<p>
lamani : di hajaar do se caaLise par ek
<p>
latvian : divtu^kstoš trīssimt četrdesmit viens
<p>
lithuanian : dù tú:kstanc^iu, try:s s^imtai~ ke:turiasdes^imt víenas
<p>
lotuxo : tausand ârrexai ikO EssIxa xunixoi ikO atOmwana aNwan x' âbotye
<p>
maale : lam?ó $íya haitsó s'ééta ?oydí-támmi pétte
<p>
malay : dua ribu tiga ratus empat puluh satu
<p>
maltese : elfejn tliet mija u wieh-ed u erbgh-in
<p>
mapuche : epu warangka külá pataka meli mari kiñe
<p>
margi : dúbú s`&d>àN ghàrú mák`&r agá fód>ú kùmì gà s'&r pátlú*
<p>
maybrat : N
<p>
miya : d'&bu ts`&r '`&náa d>àriy kìdi '`&náa díb>i f`&d>& bèh&n wut'&
<p>
mongolian : qoyar mingGan Gurban ĵa'un döčin nigän
<p>
nenets : side juonar n-ahar jur t-êt ju' ~ob
<p>
norwegian_book : to tusen og tre hundre og førti et
<p>
old_church_slavonic : дъвѣ тысѭшти триѥ съта четыре десѧте и ѥдинъ
<p>
oromo : kuma lama fi dhibba sadii fi afurtamii tokko
<p>
pashto : دوه زره دري سوه او يو څلوۍښت
<p>
polish : dwa tysiace trzysta czterdziesci jeden
<p>
portuguese : dois mil trezentos quarenta e um
<p>
quechua : iskay warank'a kinsa pachak tawa chunka jukniyuq
<p>
romanian : două mii trei sute patruzeci şi unu
<p>
russian : две тысячи триста сорок один
<p>
sango : ngbangbu bale óse na ndó ní ngbangbu otá na ndó ní bale osió na ndó ní ÓkO
<p>
sanskrit : त्रि शतान्य एकचत्वारिंशच च द्वे सहस्रे
<p>
slovak : dva tisic tri sto styridsat jedna
<p>
sorani : دۇ ههزار سىسهد ځل و يهك
<p>
spanish : dos mil trescientos cuarenta y uno
<p>
stieng : baar ban pê riêng puôn jo't muôi
<p>
swahili : elfu mbili mia tatu arobaini na moja
<p>
swedish : två tusen tre hundra fyrtio ett
<p>
tamil : இரணௌடௌ ஆயாரதௌதீ மீனௌ ந஽ரீ ந஽ரௌ பதௌ ஓனௌரீ
<p>
tampere : kaks tuhatta kolme sataa nel kyt yks
<p>
tibetan : t̆ong ṭ'a' n̆yī d́ang sumğya d́ang z̆hyib chu źhye chi'
<p>
totonac : maa t~u3 mil lii ~a tuhun pus^um tun
<p>
tuda_daza : dubu cu sao kidra ago.zo. sao mOrta tozo sao tro
<p>
tukang_besi : dua riwu tolu hatu hato hulu sa'asa
<p>
turkish : iki bin üç yüz kırk bir
<p>
votic : kahsi tuhatta keVmsata: nelläts^ümmet ühsi
<p>
welsh : dau fil tri chan un a deugain
<p>
yasin_burushaski : altó hazár iskí tha altó-áltar hek
<p>
zaiwa : i55 hing55 sum11 syo31 mi11 cue31 ra11
</body>
</html>

Binary file not shown.

View File

@@ -1,569 +0,0 @@
\batchmode
%This Latex file is machine-generated by the BNF-converter
\documentclass[a4paper,11pt]{article}
\author{BNF-converter}
\title{The Language GF}
\setlength{\parindent}{0mm}
\setlength{\parskip}{1mm}
\begin{document}
\maketitle
\newcommand{\emptyP}{\mbox{$\epsilon$}}
\newcommand{\terminal}[1]{\mbox{{\texttt {#1}}}}
\newcommand{\nonterminal}[1]{\mbox{$\langle \mbox{{\sl #1 }} \! \rangle$}}
\newcommand{\arrow}{\mbox{::=}}
\newcommand{\delimit}{\mbox{$|$}}
\newcommand{\reserved}[1]{\mbox{{\texttt {#1}}}}
\newcommand{\literal}[1]{\mbox{{\texttt {#1}}}}
\newcommand{\symb}[1]{\mbox{{\texttt {#1}}}}
This document was automatically generated by the {\em BNF-Converter}. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place).
\section*{The lexical structure of GF}
\subsection*{Identifiers}
Identifiers \nonterminal{Ident} are unquoted strings beginning with a letter,
followed by any combination of letters, digits, and the characters {\tt \_ '},
reserved words excluded.
\subsection*{Literals}
Integer literals \nonterminal{Int}\ are nonempty sequences of digits.
String literals \nonterminal{String}\ have the form
\terminal{"}$x$\terminal{"}, where $x$ is any sequence of any characters
except \terminal{"}\ unless preceded by \verb6\6.
LString literals are recognized by the regular expression
\(\mbox{`''} ({\nonterminal{anychar}} - \mbox{`''})* \mbox{`''}\)
\subsection*{Reserved words and symbols}
The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions.
The reserved words used in GF are the following: \\
\begin{tabular}{lll}
{\reserved{Lin}} &{\reserved{PType}} &{\reserved{Str}} \\
{\reserved{Strs}} &{\reserved{Tok}} &{\reserved{Type}} \\
{\reserved{abstract}} &{\reserved{case}} &{\reserved{cat}} \\
{\reserved{concrete}} &{\reserved{data}} &{\reserved{def}} \\
{\reserved{flags}} &{\reserved{fn}} &{\reserved{fun}} \\
{\reserved{grammar}} &{\reserved{in}} &{\reserved{include}} \\
{\reserved{incomplete}} &{\reserved{instance}} &{\reserved{interface}} \\
{\reserved{let}} &{\reserved{lin}} &{\reserved{lincat}} \\
{\reserved{lindef}} &{\reserved{lintype}} &{\reserved{of}} \\
{\reserved{open}} &{\reserved{oper}} &{\reserved{out}} \\
{\reserved{package}} &{\reserved{param}} &{\reserved{pattern}} \\
{\reserved{pre}} &{\reserved{printname}} &{\reserved{resource}} \\
{\reserved{reuse}} &{\reserved{strs}} &{\reserved{table}} \\
{\reserved{tokenizer}} &{\reserved{transfer}} &{\reserved{union}} \\
{\reserved{var}} &{\reserved{variants}} &{\reserved{where}} \\
{\reserved{with}} & & \\
\end{tabular}\\
The symbols used in GF are the following: \\
\begin{tabular}{lll}
{\symb{;}} &{\symb{{$=$}}} &{\symb{\{}} \\
{\symb{\}}} &{\symb{(}} &{\symb{)}} \\
{\symb{:}} &{\symb{{$-$}{$>$}}} &{\symb{**}} \\
{\symb{,}} &{\symb{[}} &{\symb{]}} \\
{\symb{.}} &{\symb{{$|$}}} &{\symb{\%}} \\
{\symb{?}} &{\symb{{$<$}}} &{\symb{{$>$}}} \\
{\symb{@}} &{\symb{!}} &{\symb{*}} \\
{\symb{$\backslash$}} &{\symb{{$=$}{$>$}}} &{\symb{{$+$}{$+$}}} \\
{\symb{{$+$}}} &{\symb{\_}} &{\symb{\$}} \\
{\symb{/}} &{\symb{{$-$}}} & \\
\end{tabular}\\
\subsection*{Comments}
Single-line comments begin with {\symb{{$-$}{$-$}}}. \\Multiple-line comments are enclosed with {\symb{\{{$-$}}} and {\symb{{$-$}\}}}.
\section*{The syntactic structure of GF}
Non-terminals are enclosed between $\langle$ and $\rangle$.
The symbols {\arrow} (production), {\delimit} (union)
and {\emptyP} (empty rule) belong to the BNF notation.
All other symbols are terminals.\\
\begin{tabular}{lll}
{\nonterminal{Grammar}} & {\arrow} &{\nonterminal{ListModDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListModDef}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{ModDef}} {\nonterminal{ListModDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ModDef}} & {\arrow} &{\nonterminal{ModDef}} {\terminal{;}} \\
& {\delimit} &{\terminal{grammar}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\terminal{abstract}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{;}} {\nonterminal{ListConcSpec}} {\terminal{\}}} \\
& {\delimit} &{\nonterminal{ComplMod}} {\nonterminal{ModType}} {\terminal{{$=$}}} {\nonterminal{ModBody}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ConcSpec}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ConcExp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListConcSpec}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{ConcSpec}} \\
& {\delimit} &{\nonterminal{ConcSpec}} {\terminal{;}} {\nonterminal{ListConcSpec}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ConcExp}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListTransfer}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListTransfer}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{Transfer}} {\nonterminal{ListTransfer}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Transfer}} & {\arrow} &{\terminal{(}} {\terminal{transfer}} {\terminal{in}} {\nonterminal{Open}} {\terminal{)}} \\
& {\delimit} &{\terminal{(}} {\terminal{transfer}} {\terminal{out}} {\nonterminal{Open}} {\terminal{)}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ModType}} & {\arrow} &{\terminal{abstract}} {\nonterminal{Ident}} \\
& {\delimit} &{\terminal{resource}} {\nonterminal{Ident}} \\
& {\delimit} &{\terminal{interface}} {\nonterminal{Ident}} \\
& {\delimit} &{\terminal{concrete}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\
& {\delimit} &{\terminal{instance}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\
& {\delimit} &{\terminal{transfer}} {\nonterminal{Ident}} {\terminal{:}} {\nonterminal{Open}} {\terminal{{$-$}{$>$}}} {\nonterminal{Open}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ModBody}} & {\arrow} &{\nonterminal{Extend}} {\nonterminal{Opens}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} \\
& {\delimit} &{\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{**}} {\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\
& {\delimit} &{\terminal{reuse}} {\nonterminal{Ident}} \\
& {\delimit} &{\terminal{union}} {\nonterminal{ListIncluded}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListTopDef}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{TopDef}} {\nonterminal{ListTopDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Extend}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{**}} \\
& {\delimit} &{\emptyP} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListOpen}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{Open}} \\
& {\delimit} &{\nonterminal{Open}} {\terminal{,}} {\nonterminal{ListOpen}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Opens}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\terminal{open}} {\nonterminal{ListOpen}} {\terminal{in}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Open}} & {\arrow} &{\nonterminal{Ident}} \\
& {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{)}} \\
& {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{)}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ComplMod}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\terminal{incomplete}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{QualOpen}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\terminal{incomplete}} \\
& {\delimit} &{\terminal{interface}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListIncluded}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{Included}} \\
& {\delimit} &{\nonterminal{Included}} {\terminal{,}} {\nonterminal{ListIncluded}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Included}} & {\arrow} &{\nonterminal{Ident}} \\
& {\delimit} &{\nonterminal{Ident}} {\terminal{[}} {\nonterminal{ListIdent}} {\terminal{]}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Def}} & {\arrow} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{Name}} {\nonterminal{ListPatt}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{TopDef}} & {\arrow} &{\terminal{cat}} {\nonterminal{ListCatDef}} \\
& {\delimit} &{\terminal{fun}} {\nonterminal{ListFunDef}} \\
& {\delimit} &{\terminal{data}} {\nonterminal{ListFunDef}} \\
& {\delimit} &{\terminal{def}} {\nonterminal{ListDef}} \\
& {\delimit} &{\terminal{data}} {\nonterminal{ListDataDef}} \\
& {\delimit} &{\terminal{transfer}} {\nonterminal{ListDef}} \\
& {\delimit} &{\terminal{param}} {\nonterminal{ListParDef}} \\
& {\delimit} &{\terminal{oper}} {\nonterminal{ListDef}} \\
& {\delimit} &{\terminal{lincat}} {\nonterminal{ListPrintDef}} \\
& {\delimit} &{\terminal{lindef}} {\nonterminal{ListDef}} \\
& {\delimit} &{\terminal{lin}} {\nonterminal{ListDef}} \\
& {\delimit} &{\terminal{printname}} {\terminal{cat}} {\nonterminal{ListPrintDef}} \\
& {\delimit} &{\terminal{printname}} {\terminal{fun}} {\nonterminal{ListPrintDef}} \\
& {\delimit} &{\terminal{flags}} {\nonterminal{ListFlagDef}} \\
& {\delimit} &{\terminal{printname}} {\nonterminal{ListPrintDef}} \\
& {\delimit} &{\terminal{lintype}} {\nonterminal{ListDef}} \\
& {\delimit} &{\terminal{pattern}} {\nonterminal{ListDef}} \\
& {\delimit} &{\terminal{package}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} {\terminal{;}} \\
& {\delimit} &{\terminal{var}} {\nonterminal{ListDef}} \\
& {\delimit} &{\terminal{tokenizer}} {\nonterminal{Ident}} {\terminal{;}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{CatDef}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} \\
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} {\terminal{\{}} {\nonterminal{Integer}} {\terminal{\}}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{FunDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{DataDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListDataConstr}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{DataConstr}} & {\arrow} &{\nonterminal{Ident}} \\
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListDataConstr}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{DataConstr}} \\
& {\delimit} &{\nonterminal{DataConstr}} {\terminal{{$|$}}} {\nonterminal{ListDataConstr}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ParDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListParConstr}} \\
& {\delimit} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\
& {\delimit} &{\nonterminal{Ident}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ParConstr}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{PrintDef}} & {\arrow} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{FlagDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListDef}} & {\arrow} &{\nonterminal{Def}} {\terminal{;}} \\
& {\delimit} &{\nonterminal{Def}} {\terminal{;}} {\nonterminal{ListDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListCatDef}} & {\arrow} &{\nonterminal{CatDef}} {\terminal{;}} \\
& {\delimit} &{\nonterminal{CatDef}} {\terminal{;}} {\nonterminal{ListCatDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListFunDef}} & {\arrow} &{\nonterminal{FunDef}} {\terminal{;}} \\
& {\delimit} &{\nonterminal{FunDef}} {\terminal{;}} {\nonterminal{ListFunDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListDataDef}} & {\arrow} &{\nonterminal{DataDef}} {\terminal{;}} \\
& {\delimit} &{\nonterminal{DataDef}} {\terminal{;}} {\nonterminal{ListDataDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListParDef}} & {\arrow} &{\nonterminal{ParDef}} {\terminal{;}} \\
& {\delimit} &{\nonterminal{ParDef}} {\terminal{;}} {\nonterminal{ListParDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListPrintDef}} & {\arrow} &{\nonterminal{PrintDef}} {\terminal{;}} \\
& {\delimit} &{\nonterminal{PrintDef}} {\terminal{;}} {\nonterminal{ListPrintDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListFlagDef}} & {\arrow} &{\nonterminal{FlagDef}} {\terminal{;}} \\
& {\delimit} &{\nonterminal{FlagDef}} {\terminal{;}} {\nonterminal{ListFlagDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListParConstr}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{ParConstr}} \\
& {\delimit} &{\nonterminal{ParConstr}} {\terminal{{$|$}}} {\nonterminal{ListParConstr}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListIdent}} & {\arrow} &{\nonterminal{Ident}} \\
& {\delimit} &{\nonterminal{Ident}} {\terminal{,}} {\nonterminal{ListIdent}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Name}} & {\arrow} &{\nonterminal{Ident}} \\
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\terminal{]}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListName}} & {\arrow} &{\nonterminal{Name}} \\
& {\delimit} &{\nonterminal{Name}} {\terminal{,}} {\nonterminal{ListName}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{LocDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListLocDef}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{LocDef}} \\
& {\delimit} &{\nonterminal{LocDef}} {\terminal{;}} {\nonterminal{ListLocDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Exp4}} & {\arrow} &{\nonterminal{Ident}} \\
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\
& {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{\%}} \\
& {\delimit} &{\nonterminal{Sort}} \\
& {\delimit} &{\nonterminal{String}} \\
& {\delimit} &{\nonterminal{Integer}} \\
& {\delimit} &{\terminal{?}} \\
& {\delimit} &{\terminal{[}} {\terminal{]}} \\
& {\delimit} &{\terminal{data}} \\
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{Exps}} {\terminal{]}} \\
& {\delimit} &{\terminal{[}} {\nonterminal{String}} {\terminal{]}} \\
& {\delimit} &{\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListTupleComp}} {\terminal{{$>$}}} \\
& {\delimit} &{\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{Exp}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$>$}}} \\
& {\delimit} &{\terminal{(}} {\nonterminal{Exp}} {\terminal{)}} \\
& {\delimit} &{\nonterminal{LString}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Exp3}} & {\arrow} &{\nonterminal{Exp3}} {\terminal{.}} {\nonterminal{Label}} \\
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\}}} \\
& {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\%}} \\
& {\delimit} &{\nonterminal{Exp4}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Exp2}} & {\arrow} &{\nonterminal{Exp2}} {\nonterminal{Exp3}} \\
& {\delimit} &{\terminal{table}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
& {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
& {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{[}} {\nonterminal{ListExp}} {\terminal{]}} \\
& {\delimit} &{\terminal{case}} {\nonterminal{Exp}} {\terminal{of}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
& {\delimit} &{\terminal{variants}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\
& {\delimit} &{\terminal{pre}} {\terminal{\{}} {\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListAltern}} {\terminal{\}}} \\
& {\delimit} &{\terminal{strs}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\
& {\delimit} &{\nonterminal{Ident}} {\terminal{@}} {\nonterminal{Exp4}} \\
& {\delimit} &{\nonterminal{Exp3}} \\
& {\delimit} &{\terminal{Lin}} {\nonterminal{Ident}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Exp1}} & {\arrow} &{\nonterminal{Exp1}} {\terminal{!}} {\nonterminal{Exp2}} \\
& {\delimit} &{\nonterminal{Exp1}} {\terminal{*}} {\nonterminal{Exp2}} \\
& {\delimit} &{\nonterminal{Exp1}} {\terminal{**}} {\nonterminal{Exp2}} \\
& {\delimit} &{\nonterminal{Exp2}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Exp}} & {\arrow} &{\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\terminal{$\backslash$}} {\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{Decl}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}{$+$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}}} {\nonterminal{Exp}} \\
& {\delimit} &{\terminal{let}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} {\terminal{in}} {\nonterminal{Exp}} \\
& {\delimit} &{\terminal{let}} {\nonterminal{ListLocDef}} {\terminal{in}} {\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{Exp1}} {\terminal{where}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\
& {\delimit} &{\terminal{fn}} {\terminal{\{}} {\nonterminal{ListEquation}} {\terminal{\}}} \\
& {\delimit} &{\nonterminal{Exp1}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListExp}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{Exp}} \\
& {\delimit} &{\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListExp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Exps}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{Exp4}} {\nonterminal{Exps}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Patt1}} & {\arrow} &{\terminal{\_}} \\
& {\delimit} &{\nonterminal{Ident}} \\
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\
& {\delimit} &{\nonterminal{Integer}} \\
& {\delimit} &{\nonterminal{String}} \\
& {\delimit} &{\terminal{\{}} {\nonterminal{ListPattAss}} {\terminal{\}}} \\
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListPattTupleComp}} {\terminal{{$>$}}} \\
& {\delimit} &{\terminal{(}} {\nonterminal{Patt}} {\terminal{)}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Patt}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListPatt}} \\
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\nonterminal{ListPatt}} \\
& {\delimit} &{\nonterminal{Patt1}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{PattAss}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Patt}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Label}} & {\arrow} &{\nonterminal{Ident}} \\
& {\delimit} &{\terminal{\$}} {\nonterminal{Integer}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Sort}} & {\arrow} &{\terminal{Type}} \\
& {\delimit} &{\terminal{PType}} \\
& {\delimit} &{\terminal{Tok}} \\
& {\delimit} &{\terminal{Str}} \\
& {\delimit} &{\terminal{Strs}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListPattAss}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{PattAss}} \\
& {\delimit} &{\nonterminal{PattAss}} {\terminal{;}} {\nonterminal{ListPattAss}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{PattAlt}} & {\arrow} &{\nonterminal{Patt}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListPatt}} & {\arrow} &{\nonterminal{Patt1}} \\
& {\delimit} &{\nonterminal{Patt1}} {\nonterminal{ListPatt}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListPattAlt}} & {\arrow} &{\nonterminal{PattAlt}} \\
& {\delimit} &{\nonterminal{PattAlt}} {\terminal{{$|$}}} {\nonterminal{ListPattAlt}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Bind}} & {\arrow} &{\nonterminal{Ident}} \\
& {\delimit} &{\terminal{\_}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListBind}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{Bind}} \\
& {\delimit} &{\nonterminal{Bind}} {\terminal{,}} {\nonterminal{ListBind}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Decl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\
& {\delimit} &{\nonterminal{Exp2}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{TupleComp}} & {\arrow} &{\nonterminal{Exp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{PattTupleComp}} & {\arrow} &{\nonterminal{Patt}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListTupleComp}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{TupleComp}} \\
& {\delimit} &{\nonterminal{TupleComp}} {\terminal{,}} {\nonterminal{ListTupleComp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListPattTupleComp}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{PattTupleComp}} \\
& {\delimit} &{\nonterminal{PattTupleComp}} {\terminal{,}} {\nonterminal{ListPattTupleComp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Case}} & {\arrow} &{\nonterminal{ListPattAlt}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListCase}} & {\arrow} &{\nonterminal{Case}} \\
& {\delimit} &{\nonterminal{Case}} {\terminal{;}} {\nonterminal{ListCase}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Equation}} & {\arrow} &{\nonterminal{ListPatt}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListEquation}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{Equation}} \\
& {\delimit} &{\nonterminal{Equation}} {\terminal{;}} {\nonterminal{ListEquation}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Altern}} & {\arrow} &{\nonterminal{Exp}} {\terminal{/}} {\nonterminal{Exp}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListAltern}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{Altern}} \\
& {\delimit} &{\nonterminal{Altern}} {\terminal{;}} {\nonterminal{ListAltern}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{DDecl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\
& {\delimit} &{\nonterminal{Exp4}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListDDecl}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\nonterminal{DDecl}} {\nonterminal{ListDDecl}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{OldGrammar}} & {\arrow} &{\nonterminal{Include}} {\nonterminal{ListTopDef}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{Include}} & {\arrow} &{\emptyP} \\
& {\delimit} &{\terminal{include}} {\nonterminal{ListFileName}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{FileName}} & {\arrow} &{\nonterminal{String}} \\
& {\delimit} &{\nonterminal{Ident}} \\
& {\delimit} &{\terminal{/}} {\nonterminal{FileName}} \\
& {\delimit} &{\terminal{.}} {\nonterminal{FileName}} \\
& {\delimit} &{\terminal{{$-$}}} {\nonterminal{FileName}} \\
& {\delimit} &{\nonterminal{Ident}} {\nonterminal{FileName}} \\
\end{tabular}\\
\begin{tabular}{lll}
{\nonterminal{ListFileName}} & {\arrow} &{\nonterminal{FileName}} {\terminal{;}} \\
& {\delimit} &{\nonterminal{FileName}} {\terminal{;}} {\nonterminal{ListFileName}} \\
\end{tabular}\\
\end{document}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

View File

@@ -1,75 +0,0 @@
digraph {
size = "12,8" ;
Lang [style = "solid", shape = "ellipse", URL = "Lang.gf"];
Lang -> Grammar [style = "solid"];
Lang -> Lexicon [style = "solid"];
Grammar [style = "solid", shape = "ellipse", URL = "Lang.gf"];
Grammar -> Noun [style = "solid"];
Grammar -> Verb [style = "solid"];
Grammar -> Adjective [style = "solid"];
Grammar -> Adverb [style = "solid"];
Grammar -> Numeral [style = "solid"];
Grammar -> Sentence [style = "solid"];
Grammar -> Question [style = "solid"];
Grammar -> Relative [style = "solid"];
Grammar -> Conjunction [style = "solid"];
Grammar -> Phrase [style = "solid"];
Grammar -> Text [style = "solid"];
Grammar -> Idiom [style = "solid"];
Grammar -> Structural [style = "solid"];
Noun [style = "solid", shape = "ellipse", URL = "Noun.gf"];
Noun -> Cat [style = "solid"];
Verb [style = "solid", shape = "ellipse", URL = "Verb.gf"];
Verb -> Cat [style = "solid"];
Adjective [style = "solid", shape = "ellipse", URL = "Adjective.gf"];
Adjective -> Cat [style = "solid"];
Adverb [style = "solid", shape = "ellipse", URL = "Adverb.gf"];
Adverb -> Cat [style = "solid"];
Numeral [style = "solid", shape = "ellipse", URL = "Numeral.gf"];
Numeral -> Cat [style = "solid"];
Sentence [style = "solid", shape = "ellipse", URL = "Sentence.gf"];
Sentence -> Cat [style = "solid"];
Question [style = "solid", shape = "ellipse", URL = "Question.gf"];
Question -> Cat [style = "solid"];
Relative [style = "solid", shape = "ellipse", URL = "Relative.gf"];
Relative -> Cat [style = "solid"];
Conjunction [style = "solid", shape = "ellipse", URL = "Conjunction.gf"];
Conjunction -> Cat [style = "solid"];
Phrase [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
Phrase -> Cat [style = "solid"];
Text [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
Text -> Cat [style = "solid"];
Idiom [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
Idiom -> Cat [style = "solid"];
Structural [style = "solid", shape = "ellipse", URL = "Structural.gf"];
Structural -> Cat [style = "solid"];
Lexicon [style = "solid", shape = "ellipse", URL = "Lexicon.gf"];
Lexicon -> Cat [style = "solid"];
Cat [style = "solid", shape = "ellipse", URL = "Cat.gf"];
Cat -> Common [style = "solid"];
Common [style = "solid", shape = "ellipse", URL = "Tense.gf"];
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 77 KiB

View File

@@ -1,231 +0,0 @@
* Some notes on the syntax of this file, making it possible to use todoo-mode.el:
- Items start with "* "
- Sub-items start with "- "
- It should be noted somewhere in the item, who has reported the item
Suggestion: Add "[who]" at the beginning of the item title
(then one can use "assign item" in todoo-mode)
- Each item should have a priority
Suggestion: Add "URGENT", "IMPORTANT" or "WISH" at the beginning of
the item title
- Sort the items in priority order
(todoo-mode can move an item up or down)
----------------------------------------------------------------------
* [peb] URGENT: Error messages for syntax errors
When a syntax error is reported, it should be noted which file it
is. Otherwise it is impossible to know where the error is
(if one uses the -s flag):
> i -s Domain/MP3/Domain_MP_Semantics.gf
syntax error at line 33 before ve , Proposition ,
There's no problem with other kinds of errors:
> i -s Domain/MP3/Domain_MP_Semantics.gf
checking module Godis_Semantics
Happened in linearization of userMove :
product expected instead of {
pl : Str
}
* [peb] IMPORTANT: Add the -path of a module to daughter modules
Then the main module does not have to know where all grandchildren are:
file A.gf:
abstract A = B ** {...}
file B.gf:
--# -path=./resource
abstract B = Lang ** {...}
I.e.: the file A.gf should not need to know that B.gf uses the
resource library.
* [peb] IMPORTANT: incomplete concrete and interfaces
- The following works in GF:
incomplete concrete TestDI of TestA = open (C=TestCI) in {
lincat A = TestCI.A ** {p : Str};
lin f = TestCI.f ** {p = "f"};
g = TestCI.g ** {p = "g"};
}
> i -src TestDE.gf
- BUT, if we exchange "TestCI" for "C" we get an error:
incomplete concrete TestDI of TestA = open (C=TestCI) in {
lincat A = C.A ** {p : Str};
lin f = C.f ** {p = "f"};
g = C.g ** {p = "g"};
}
> i -src TestDE.gf
compiling TestDE.gf... failed to find C
OCCURRED IN
atomic term C given TestCE TestCI TestCE TestDE
OCCURRED IN
renaming definition of f
OCCURRED IN
renaming module TestDE
- the other modules:
abstract TestA = {
cat A;
fun f, g : A;
}
instance TestBE of TestBI = {
oper hello = "hello";
bye = "bye";
}
interface TestBI = {
oper hello : Str;
bye : Str;
}
concrete TestCE of TestA = TestCI with (TestBI = TestBE);
incomplete concrete TestCI of TestA = open TestBI in {
lincat A = {s : Str};
lin f = {s = hello};
g = {s = bye};
}
concrete TestDE of TestA = TestDI with (TestCI = TestCE);
* [peb] IMPORTANT: Missing things in the help command
> h -printer
(the flag -printer=cfgm is missing)
> h -cat
WARNING: invalid option: cat
> h -lang
WARNING: invalid option: lang
> h -language
WARNING: invalid option: language
> h -parser
WARNING: invalid option: parser
> h -aslkdjaslkdjss
WARNING: invalid option: aslkdjaslkdjss
Command not found.
(it should note: "option not found")
> h -optimize
WARNING: invalid option: optimize
> h -startcat
WARNING: invalid option: startcat
> h h
h, help: h Command?
(it should also mention "h -option")
* [peb] IMPORTANT: Set GF_LIb-PATH within GF
> sf libpath=~/GF/lib
* [peb] IMPORTANT: Set the starting category with "sf"
> sf startcat=X
* [peb] IMPORTANT: import-flags
- There are some inconsistencies when importing grammars:
1. when doing "pg -printer=cfg", one must have used "i -conversion=finite",
since "pg" doesn't care about the flags that are set in the grammar file
2. when doing "pm -printer=cfgm", one must have set the flag
"conversion=finite" within the grammar file, since "pm" doesn't
care about the flags to the import command
(I guess it's me (peb) who should fix this, but I don't know where
the different flags reside...)
- Also, it must be decided in what cases flags can override other flags:
a) in the grammar file, e.g. "flags conversion=finite;"
b) on the command line, e.g. "> sf conversion=finite"
c) as argument to a command, e.g. "> i -conversion=finite file.gf"
- A related issue is to decide the scope of flags:
Some flags are (or should be) local to the module
(e.g. -coding and -path)
Other flags override daughter flags for daughter modules
(e.g. -startcat and -conversion)
* [bringert] IMPORTANT: get right startcat flag when printing CFGM
GF.CFGM.PrintCFGrammar.prCanonAsCFGM currently only gets the startcat
flag from the top-level concrete module. This might be easier
to fix if the multi grammar printers had access to more than just
the CanonGrammar.
* [peb] WISH: generalizing incomplete concrete
I want to be able to open an incomplete concrete module
inside another incomplete conrete.
Then I can instantiate both incompletes at the same time.
* [peb] WISH: _tmpi, _tmpo
The files _tmpi and _tmpo are never removed when quitting GF.
Further suggestion: put them in /tmp or similar.
peb: när man använder "|" till ett systemanrop, t.ex:
pg | ! sort
så skapas filerna _tmpi och _tmpo. Men de tas aldrig bort.
peb: Ännu bättre: ta bort filerna efteråt.
aarne: Sant: när GF quittas (om detta inte sker onormalt).
Eller när kommandot har kört färdigt (om det terminerar).
peb: Bäst(?): skapa filerna i /tmp eller liknande.
aarne: Ibland får man skrivrättighetsproblem - och det är
inte kul om man måste ange en tmp-path. Och olika
användare och gf-processer måste ha unika filnamn.
Och vet inte hur det funkar på windows...
aarne: Ett till alternativ skulle vara att använda handles
utan några tmp-filer alls. Men jag har inte hunnit
ta reda på hur det går till.
björn: Lite slumpmässiga tankar:
+ man kan använda System.Directory.getTemporaryDirectory, så slipper man iaf bry sig om olika plattformsproblem.
+ sen kan man använda System.IO.openTempFile för att skapa en temporär fil. Den tas dock inte bort när programmet avslutas, så det får man fixa själv.
+ System.Posix.Temp.mkstemp gör nåt liknande, men dokumentationen är dålig.
+ biblioteket HsShellScript har lite funktioner för sånt här, se
http://www.volker-wysk.de/hsshellscript/apidoc/HsShellScript.html#16
* [peb] WISH: Hierarchic modules
Suggestion by peb:
The module A.B.C is located in the file A/B/C.gf
Main advantage: you no longer need to state "--# -path=..." in
modules
- How can this be combined with several modules inside one file?

View File

@@ -1,750 +0,0 @@
Compiling GF
Aarne Ranta
Proglog meeting, 1 November 2006
% to compile: txt2tags -thtml compiling-gf.txt ; htmls compiling-gf.html
%!target:html
%!postproc(html): #NEW <!-- NEW -->
#NEW
==The compilation task==
GF is a grammar formalism, i.e. a special purpose programming language
for writing grammars.
Other grammar formalisms:
- BNF, YACC, Happy (grammars for programming languages);
- PATR, HPSG, LFG (grammars for natural languages).
The grammar compiler prepares a GF grammar for two computational tasks:
- linearization: take syntax trees to strings
- parsing: take strings to syntax trees
The grammar gives a declarative description of these functionalities,
on a high abstraction level that improves grammar writing
productivity.
For efficiency, the grammar is compiled to lower-level formats.
Type checking is another essential compilation phase. Its purpose is
twofold, as usual:
- checking the correctness of the grammar
- type-annotating expressions for code generation
#NEW
==Characteristics of GF language==
Functional language with types, both built-in and user-defined.
```
Str : Type
param Number = Sg | Pl
param AdjForm = ASg Gender | APl
Noun : Type = {s : Number => Str ; g : Gender}
```
Pattern matching.
```
svart_A = table {
ASg _ => "svart" ;
_ => "svarta"
}
```
Higher-order functions.
Dependent types.
```
flip : (a, b, c : Type) -> (a -> b -> c) -> b -> a -> c =
\_,_,_,f,y,x -> f x y ;
```
#NEW
==The module system of GF==
Main division: abstract syntax and concrete syntax
```
abstract Greeting = {
cat Greet ;
fun Hello : Greet ;
}
concrete GreetingEng of Greeting = {
lincat Greet = {s : Str} ;
lin Hello = {s = "hello"} ;
}
concrete GreetingIta of Greeting = {
param Politeness = Familiar | Polite ;
lincat Greet = {s : Politeness => Str} ;
lin Hello = {s = table {
Familiar => "ciao" ;
Polite => "buongiorno"
} ;
}
```
Other features of the module system:
- extension and opening
- parametrized modules (cf. ML: signatures, structures, functors)
#NEW
==GF vs. Haskell==
Some things that (standard) Haskell hasn't:
- records and record subtyping
- regular expression patterns
- dependent types
- ML-style modules
Some things that GF hasn't:
- infinite (recursive) data types
- recursive functions
- classes, polymorphism
#NEW
==GF vs. most linguistic grammar formalisms==
GF separates abstract syntax from concrete syntax.
GF has a module system with separate compilation.
GF is generation-oriented (as opposed to parsing).
GF has unidirectional matching (as opposed to unification).
GF has a static type system (as opposed to a type-free universe).
"I was - and I still am - firmly convinced that a program composed
out of statically type-checked parts is more likely to faithfully
express a well-thought-out design than a program relying on
weakly-typed interfaces or dynamically-checked interfaces."
(B. Stroustrup, 1994, p. 107)
#NEW
==The computation model: abstract syntax==
An abstract syntax defines a free algebra of trees (using
dependent types, recursion, higher-order abstract syntax:
GF includes a complete Logical Framework).
```
cat C (x_1 : A_1)...(x_n : A_n)
a_1 : A_1
...
a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1}
----------------------------------------------------
(C a_1 ... a_n) : Type
fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A
a_1 : A_1
...
a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1}
----------------------------------------------------
(f a_1 ... a_n) : A{x_1 : A_1,...,x_n : A_n}
A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A
---------------------------- ---------------------- ------------------------
(x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A}
```
Notice that all syntax trees are in eta-long form.
#NEW
==The computation model: concrete syntax==
A concrete syntax defines a homomorphism (compositional mapping)
from the abstract syntax to a system of concrete syntax objects.
```
cat C _
--------------------
lincat C = C* : Type
fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A
-----------------------------------------------
lin f = f* : A_1* -> ... -> A_n* -> A*
(f a_1 ... a_n)* = f* a_1* ... a_n*
```
The homomorphism can as such be used as linearization function.
It is a functional program, but a restricted one, since it works
in the end on finite data structures only.
But a more efficient program is obtained via compilation to
GFC = Canonical GF: the "machine code" of GF.
The parsing problem of GFC can be reduced to that of MPCFG (Multiple
Parallel Context Free Grammars), see P. Ljunglöf's thesis (2004).
#NEW
==The core type system of concrete syntax: basic types==
```
param P P : PType
PType : Type --------- ---------
P : PType P : Type
s : Str t : Str
Str : type "foo" : Str [] : Str ----------------
s ++ t : Str
```
#NEW
==The core type system of concrete syntax: functions and tables==
```
A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A
---------------------------- ---------------------- ------------------------
(x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A}
P : PType A : Type t : P => A p : p
-------------------- -----------------
P => A : Type t ! p : A
v_1,...,v_n : A
---------------------------------------------- P = {C_1,...,C_n}
table {C_1 => v_1 ; ... ; C_n => v_n} : P => A
```
Pattern matching is treated as an abbreviation for tables. Notice that
```
case e of {...} == table {...} ! e
```
#NEW
==The core type system of concrete syntax: records==
```
A_1,...,A_n : Type
------------------------------------ n >= 0
{r_1 : A_1 ; ... ; r_n : A_n} : Type
a_1 : A_1 ... a_n : A_n
------------------------------------------------------------
{r_1 = a_1 ; ... ; r_n = a_n} : {r_1 : A_1 ; ... ; r_n : A_n}
r : {r_1 : A_1 ; ... ; r_n : A_n}
----------------------------------- i = 1,...,n
r.r_1 : A_1
```
Subtyping: if ``r : R`` then ``r : R ** {r : A}``
#NEW
==Computation rules==
```
(\x -> b) a = b{x := a}
(table {C_1 => v_1 ; ... ; C_n => v_n} : P => A) ! C_i = v_i
{r_1 = a_1 ; ... ; r_n = a_n}.r_i = a_i
```
#NEW
==Canonical GF==
Concrete syntax type system:
```
A_1 : Type ... A_n : Type
Str : Type Int : Type ------------------------- $i : A
[A_1, ..., A_n] : Type
a_1 : A_1 ... a_n : A_n t : [A_1, ..., A_n]
--------------------------------- ------------------- i = 1,..,n
[a_1, ..., a_n] : [A_1, ..., A_n] t ! i : A_i
```
Tuples represent both records and tables.
There are no functions.
Linearization:
```
lin f = f*
(f a_1 ... a_n)* = f*{$1 = a_1*, ..., $n = a_n*}
```
#NEW
==The compilation task, again==
1. From a GF source grammar, derive a canonical GF grammar.
2. From the canonical GF grammar derive an MPCFG grammar
The canonical GF grammar can be used for linearization, with
linear time complexity (w.r.t. the size of the tree).
The MPCFG grammar can be used for parsing, with (unbounded)
polynomial time complexity (w.r.t. the size of the string).
For these target formats, we have also built interpreters in
different programming languages (C, C++, Haskell, Java, Prolog).
Moreover, we generate supplementary formats such as grammars
required by various speech recognition systems.
#NEW
==An overview of compilation phases==
Legend:
- ellipse node: representation saved in a file
- plain text node: internal representation
- solid arrow or ellipse: essential phare or format
- dashed arrow or ellipse: optional phase or format
- arrow label: the module implementing the phase
[gf-compiler.png]
#NEW
==Using the compiler==
Batch mode (cf. GHC).
Interactive mode, building the grammar incrementally from
different files, with the possibility of testing them
(cf. GHCI).
The interactive mode was first, built on the model of ALF-2
(L. Magnusson), and there was no file output of compiled
grammars.
#NEW
==Modules and separate compilation==
The above diagram shows what happens to each module.
(But not quite, since some of the back-end formats must be
built for sets of modules: GFCC and the parser formats.)
When the grammar compiler is called, it has a main module as its
argument. It then builds recursively a dependency graph with all
the other modules, and decides which ones must be recompiled.
The behaviour is rather similar to GHC.
Separate compilation is //extremely important// when developing
big grammars, especially when using grammar libraries. Example: compiling
the GF resource grammar library takes 5 minutes, whereas reading
in the compiled image takes 10 seconds.
#NEW
==Module dependencies and recompilation==
(For later use, not for the Proglog talk)
For each module M, there are 3 kinds of files:
- M.gf, source file
- M.gfc, compiled file ("object file")
- M.gfr, type-checked and optimized source file (for resource modules only)
The compiler reads gf files and writes gfc files (and gfr files if appropriate)
The Main module is the one used as argument when calling GF.
A module M (immediately) depends on the module K, if either
- M is a concrete of K
- M is an instance of K
- M extends K
- M opens K
- M is a completion of K with something
- M is a completion of some module with K instantiated with something
A module M (transitively) depends on the module K, if either
- M immediately depends on K
- M depends on some L such that L immediately depends on K
Immediate dependence is readable from the module header without parsing
the whole module.
The compiler reads recursively the headers of all modules that Main depends on.
These modules are arranged in a dependency graph, which is checked to be acyclic.
To decide whether a module M has to be compiled, do:
+ Get the time stamps t() of M.gf and M.gfc (if a file doesn't exist, its
time is minus infinity).
+ If t(M.gf) > t(M.gfc), M must be compiled.
+ If M depends on K and K must be compiled, then M must be compiled.
+ If M depends on K and t(K.gf) > t(M.gfc), then M must be compiled.
Decorate the dependency graph by information on whether the gf or the gfc (and gfr)
format is to be read.
Topologically sort the decorated graph, and read each file in the chosen format.
The gfr file is generated for these module types only:
- resource
- instance
When reading K.gfc, also K.gfr is read if some M depending on K has to be compiled.
In other cases, it is enough to read K.gfc.
In an interactive GF session, some modules may be in memory already.
When read to the memory, each module M is given time stamp t(M.m).
The additional rule now is:
- If M.gfc is to be read, and t(M.m) > t(M.gfc), don't read M.gfc.
#NEW
==Techniques used==
The compiler is written in Haskell, with some C foreign function calls
in the interactive version (readline, killing threads).
BNFC is used for generating both the parsers and printers.
This has helped to make the formats portable.
"Almost compositional functions" (``composOp``) are used in
many compiler passes, making them easier to write and understand.
A ``grep`` on the sources reveals 40 uses (outside the definition
of ``composOp`` itself).
The key algorithmic ideas are
- type-driven partial evaluation in GF-to-GFC generation
- common subexpression elimination as back-end optimization
- some ideas in GFC-to-MCFG encoding
#NEW
==Type-driven partial evaluation==
Each abstract syntax category in GF has a corresponding linearization type:
```
cat C
lincat C = T
```
The general form of a GF rule pair is
```
fun f : C1 -> ... -> Cn -> C
lin f = t
```
with the typing condition following the ``lincat`` definitions
```
t : T1 -> ... -> Tn -> T
```
The term ``t`` is in general built by using abstraction methods such
as pattern matching, higher-order functions, local definitions,
and library functions.
The compilation technique proceeds as follows:
- use eta-expansion on ``t`` to determine the canonical form of the term
```
\ $C1, ...., $Cn -> (t $C1 .... $Cn)
```
with unique variables ``$C1 .... $Cn`` for the arguments; repeat this
inside the term for records and tables
- evaluate the resulting term using the computation rules of GF
- what remains is a canonical term with ``$C1 .... $Cn`` the only
variables (the run-time input of the linearization function)
#NEW
==Eta-expanding records and tables==
For records that are valied via subtyping, eta expansion
eliminates superfluous fields:
```
{r1 = t1 ; r2 = t2} : {r1 : T1} ----> {r1 = t1}
```
For tables, the effect is always expansion, since
pattern matching can be used to represent tables
compactly:
```
table {n => "fish"} : Number => Str --->
table {
Sg => "fish" ;
Pl => "fish"
}
```
This can be helped by back-end optimizations (see below).
#NEW
==Eliminating functions==
"Everything is finite": parameter types, records, tables;
finite number of string tokens per grammar.
But "inifinite types" such as function types are useful when
writing grammars, to enable abstractions.
Since function types do not appear in linearization types,
we want functions to be eliminated from linearization terms.
This is similar to the **subformula property** in logic.
Also the main problem is similar: function depending on
a run-time variable,
```
(table {P => f ; Q = g} ! x) a
```
This is not a redex, but we can make it closer to one by moving
the application inside the table,
```
table {P => f a ; Q = g a} ! x
```
This transformation is the same as Prawitz's (1965) elimination
of maximal segments in natural deduction:
```
A B
C -> D C C -> D C
A B --------- ---------
A v B C -> D C -> D A v B D D
--------------------- ===> -------------------------
C -> D C D
--------------------
D
```
#NEW
==Size effects of partial evaluation==
Irrelevant table branches are thrown away, which can reduce the size.
But, since tables are expanded and auxiliary functions are inlined,
the size can grow exponentially.
How can we keep the first property and eliminate the second?
#NEW
==Parametrization of tables==
Algorithm: for each branch in a table, consider replacing the
argument by a variable:
```
table { table {
P => t ; ---> x => t[P->x] ;
Q => u x => u[Q->x]
} }
```
If the resulting branches are all equal, you can replace the table
by a lambda abstract
```
\\x => t[P->x]
```
If each created variable ``x`` is unique in the grammar, computation
with the lambda abstract is efficient.
#NEW
==Course-of-values tables==
By maintaining a canonical order of parameters in a type, we can
eliminate the left hand sides of branches.
```
table { table T [
P => t ; ---> t ;
Q => u u
} ]
```
The treatment is similar to ``Enum`` instances in Haskell.
In the end, all parameter types can be translated to
initial segments of integers.
#NEW
==Common subexpression elimination==
Algorithm:
+ Go through all terms and subterms in a module, creating
a symbol table mapping terms to the number of occurrences.
+ For each subterm appearing at least twice, create a fresh
constant defined as that subterm.
+ Go through all rules (incl. rules for the new constants),
replacing largest possible subterms with such new constants.
This algorithm, in a way, creates the strongest possible abstractions.
In general, the new constants have open terms as definitions.
But since all variables (and constants) are unique, they can
be computed by simple replacement.
#NEW
==Size effects of optimizations==
Example: the German resource grammar
``LangGer``
|| optimization | lines | characters | size % | blow-up |
| none | 5394 | 3208435 | 100 | 25 |
| all | 5394 | 750277 | 23 | 6 |
| none_subs | 5772 | 1290866 | 40 | 10 |
| all_subs | 5644 | 414119 | 13 | 3 |
| gfcc | 3279 | 190004 | 6 | 1.5 |
| gf source | 3976 | 121939 | 4 | 1 |
Optimization "all" means parametrization + course-of-values.
The source code size is an estimate, since it includes
potentially irrelevant library modules, and comments.
The GFCC format is not reusable in separate compilation.
#NEW
==The shared prefix optimization==
This is currently performed in GFCC only.
The idea works for languages that have a rich morphology
based on suffixes. Then we can replace a course of values
with a pair of a prefix and a suffix set:
```
["apa", "apan", "apor", "aporna"] --->
("ap" + ["a", "an", "or", "orna"])
```
The real gain comes via common subexpression elimination:
```
_34 = ["a", "an", "or", "orna"]
apa = ("ap" + _34)
blomma = ("blomm" + _34)
flicka = ("flick" + _34)
```
Notice that it now matters a lot how grammars are written.
For instance, if German verbs are treated as a one-dimensional
table,
```
["lieben", "liebe", "liebst", ...., "geliebt", "geliebter",...]
```
no shared prefix optimization is possible. A better form is
separate tables for non-"ge" and "ge" forms:
```
[["lieben", "liebe", "liebst", ....], ["geliebt", "geliebter",...]]
```
#NEW
==Reuse of grammars as libraries==
The idea of resource grammars: take care of all aspects of
surface grammaticality (inflection, agreement, word order).
Reuse in application grammar: via translations
```
cat C ---> oper C : Type = T
lincat C = T
fun f : A ---> oper f : A* = t
lin f = t
```
The user only needs to know the type signatures (abstract syntax).
However, this does not quite guarantee grammaticality, because
different categories can have the same lincat:
```
lincat Conj = {s : Str}
lincat Adv = {s : Str}
```
Thus someone may by accident use "and" as an adverb!
#NEW
==Forcing the type checker to act as a grammar checker==
We just have to make linearization types unique for each category.
The technique is reminiscent of Haskell's ``newtype`` but uses
records instead: we add **lock fields** e.g.
```
lincat Conj = {s : Str ; lock_Conj : {}}
lincat Adv = {s : Str ; lock_Adv : {}}
```
Thanks to record subtyping, the translation is simple:
```
fun f : C1 -> ... -> Cn -> C
lin f = t
--->
oper f : C1* -> ... -> Cn* -> C* =
\x1,...,xn -> (t x1 ... xn) ** {lock_C = {}}
```
#NEW
==Things to do==
Better compression of gfc file format.
Type checking of dependent-type pattern matching in abstract syntax.
Compilation-related modules that need rewriting
- ``ReadFiles``: clarify the logic of dependencies
- ``Compile``: clarify the logic of what to do with each module
- ``Compute``: make the evaluation more efficient
- ``Parsing/*``, ``OldParsing/*``, ``Conversion/*``: reduce the number
of parser formats and algorithms

View File

@@ -1,79 +0,0 @@
graph{
size = "7,7" ;
overlap = scale ;
"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ;
"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ;
"1" -- "Abs" [style = "solid"];
"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ;
"2" -- "Abs" [style = "solid"];
"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ;
"3" -- "Abs" [style = "solid"];
"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ;
"4" -- "Abs" [style = "solid"];
"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ;
"5" -- "Abs" [style = "solid"];
"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ;
"6" -- "Abs" [style = "solid"];
"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ;
"7" -- "Abs" [style = "solid"];
"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ;
"8" -- "Abs" [style = "solid"];
"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ;
"9" -- "Abs" [style = "solid"];
"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ;
"10" -- "Abs" [style = "solid"];
"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ;
"11" -- "Abs" [style = "solid"];
"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "12" [style = "solid"];
"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "13" [style = "solid"];
"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "14" [style = "solid"];
"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "15" [style = "solid"];
"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "16" [style = "solid"];
"17" [label = "Polish", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "17" [style = "solid"];
"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "18" [style = "solid"];
"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "19" [style = "solid"];
"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "20" [style = "solid"];
"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "21" [style = "solid"];
"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ;
"Abs" -- "22" [style = "solid"];
"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ;
"Abs" -- "23" [style = "solid"];
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

View File

@@ -1,88 +0,0 @@
digraph {
gfe [label = "file.gfe", style = "dashed", shape = "ellipse"];
gfe -> gf1 [label = " MkConcrete", style = "dashed"];
gf1 [label = "file.gf", style = "solid", shape = "ellipse"];
gf1 -> gf2 [label = " LexGF", style = "solid"];
gf2 [label = "token list", style = "solid", shape = "plaintext"];
gf2 -> gf3 [label = " ParGF", style = "solid"];
gf3 [label = "source tree", style = "solid", shape = "plaintext"];
gf3 -> gf4 [label = " SourceToGrammar", style = "solid"];
cf [label = "file.cf", style = "dashed", shape = "ellipse"];
cf -> gf4 [label = " CF.PPrCF", style = "dashed"];
ebnf [label = "file.ebnf", style = "dashed", shape = "ellipse"];
ebnf -> gf4 [label = " CF.EBNF", style = "dashed"];
gf4 [label = "GF tree", style = "solid", shape = "plaintext"];
gf4 -> gf5 [label = " Extend", style = "solid"];
gf5 [label = "inheritance-linked GF tree", style = "solid", shape = "plaintext"];
gf5 -> gf6 [label = " Rename", style = "solid"];
gf6 [label = "name-resolved GF tree", style = "solid", shape = "plaintext"];
gf6 -> gf7 [label = " CheckGrammar", style = "solid"];
gf7 [label = "type-annotated GF tree", style = "solid", shape = "plaintext"];
gf7 -> gf8 [label = " Optimize", style = "solid"];
gf8 [label = "optimized GF tree", style = "solid", shape = "plaintext"];
gf8 -> gf9 [label = " GrammarToCanon", style = "solid"];
gf9 [label = "GFC tree", style = "solid", shape = "plaintext"];
gf9 -> gfc [label = " BackOpt", style = "solid"];
gfc [label = "optimized GFC tree", style = "solid", shape = "box"];
gfc -> gf11 [label = " PrintGFC", style = "solid"];
gf11 [label = "file.gfc", style = "solid", shape = "ellipse"];
gfcc [label = "file.gfcc", style = "solid", shape = "ellipse"];
gfc -> gfcc [label = " CanonToGFCC", style = "solid"];
mcfg [label = "file.gfcm", style = "dashed", shape = "ellipse"];
gfc -> mcfg [label = " PrintGFC", style = "dashed"];
bnf [label = "file.cf", style = "dashed", shape = "ellipse"];
gfc -> bnf [label = " CF.PrLBNF", style = "dashed"];
happy [label = "file.y (Happy)", style = "dashed", shape = "ellipse"];
bnf -> happy [label = " bnfc", style = "dashed"];
bison [label = "file.y (Bison)", style = "dashed", shape = "ellipse"];
bnf -> bison [label = " bnfc", style = "dashed"];
cup [label = "parser.java (CUP)", style = "dashed", shape = "ellipse"];
bnf -> cup [label = " bnfc", style = "dashed"];
xml [label = "file.dtd (XML)", style = "dashed", shape = "ellipse"];
bnf -> xml [label = " bnfc", style = "dashed"];
cfg [label = "CFG tree", style = "solid", shape = "plaintext"];
gfc -> cfg [label = " Conversions.GFC", style = "dashed"];
cfgm [label = "file.cfgm", style = "dashed", shape = "ellipse"];
cfg -> cfgm [label = " Conversions.GFC", style = "dashed"];
srg [label = "Non-LR CFG", style = "solid", shape = "plaintext"];
cfg -> srg [label = " Speech.SRG", style = "dashed"];
gsl [label = "file.gsl", style = "dashed", shape = "ellipse"];
srg -> gsl [label = " Speech.PrGSL", style = "dashed"];
jsgf [label = "file.jsgf", style = "dashed", shape = "ellipse"];
srg -> jsgf [label = " Speech.PrJSGF", style = "dashed"];
fa [label = "DFA", style = "solid", shape = "plaintext"];
cfg -> fa [label = " Speech.CFGToFiniteState", style = "dashed"];
slf [label = "file.slf", style = "dashed", shape = "ellipse"];
fa -> slf [label = " Speech.PrSLF", style = "dashed"];
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

View File

@@ -1,350 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<TITLE>A Birds-Eye View of GF as a Grammar Formalism</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>A Birds-Eye View of GF as a Grammar Formalism</H1>
<FONT SIZE="4">
<I>Author: Aarne Ranta</I><BR>
Last update: Thu Feb 2 14:16:01 2006
</FONT></CENTER>
<P></P>
<HR NOSHADE SIZE=1>
<P></P>
<UL>
<LI><A HREF="#toc1">GF in a few words</A>
<LI><A HREF="#toc2">History of GF</A>
<LI><A HREF="#toc3">Some key ingredients of GF in other grammar formalisms</A>
<LI><A HREF="#toc4">Examples of descriptions in each formalism</A>
<LI><A HREF="#toc5">Lambda terms and records</A>
<LI><A HREF="#toc6">The structure of GF formalisms</A>
<LI><A HREF="#toc7">The expressivity of GF</A>
<LI><A HREF="#toc8">Grammars and parsing</A>
<LI><A HREF="#toc9">Grammars as software libraries</A>
<LI><A HREF="#toc10">Multilinguality</A>
<LI><A HREF="#toc11">Parametrized modules</A>
</UL>
<P></P>
<HR NOSHADE SIZE=1>
<P></P>
<P>
<IMG ALIGN="middle" SRC="Logos/gf0.png" BORDER="0" ALT="">
</P>
<P>
<I>Abstract. This document gives a general description of the</I>
<I>Grammatical Framework (GF), with comparisons to other grammar</I>
<I>formalisms such as CG, ACG, HPSG, and LFG.</I>
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc1"></A>
<H2>GF in a few words</H2>
<P>
Grammatical Framework (GF) is a grammar formalism
based on <B>constructive type theory</B>.
</P>
<P>
GF makes a distinction between <B>abstract syntax</B> and <B>concrete syntax</B>.
</P>
<P>
The abstract syntax part of GF is a <B>logical framework</B>, with
dependent types and higher-order functions.
</P>
<P>
The concrete syntax is a system of <B>records</B> containing strings and features.
</P>
<P>
A GF grammar defines a <B>reversible homomorphism</B> from an abstract syntax to a
concrete syntax.
</P>
<P>
A <B>multilingual GF grammar</B> is a set of concrete syntaxes associated with
one abstract syntax.
</P>
<P>
GF grammars are written in a high-level <B>functional programming language</B>,
which is compiled into a <B>core language</B> (GFC).
</P>
<P>
GF grammars can be used as <B>resources</B>, i.e. as libraries for writing
new grammars; these are compiled and optimized by the method of
<B>grammar composition</B>.
</P>
<P>
GF has a <B>module system</B> that supports grammar engineering and separate
compilation.
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc2"></A>
<H2>History of GF</H2>
<P>
1988. Intuitionistic Categorial Grammar; type theory as abstract syntax,
playing the role of Montague's analysis trees. Grammars implemented in Prolog.
</P>
<P>
1994. Type-Theoretical Grammar. Abstract syntax organized as a system of
combinators. Grammars implemented in ALF.
</P>
<P>
1996. Multilingual Type-Theoretical Grammar. Rules for generating six
languages from the same abstract syntax. Grammars implemented in ALF, ML, and
Haskell.
</P>
<P>
1998. The first implementation of GF as a language of its own.
</P>
<P>
2000. New version of GF: high-level functional source language, records used
for concrete syntax.
</P>
<P>
2003. The module system.
</P>
<P>
2004. Ljunglöf's thesis <I>Expressivity and Complexity of GF</I>.
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc3"></A>
<H2>Some key ingredients of GF in other grammar formalisms</H2>
<UL>
<LI>[GF ]: Grammatical Framework
<LI>[CG ]: categorial grammar
<LI>[ACG ]: abstract categorial grammar
<LI>[HPSG ]: head-driven phrase structure grammar
<LI>[LFG ]: lexical functional grammar
</UL>
<TABLE CELLPADDING="4" BORDER="1">
<TR>
<TD ALIGN="center">/</TD>
<TD>GF</TD>
<TD>ACG</TD>
<TD>LFG</TD>
<TD>HPSG</TD>
<TD>CG</TD>
</TR>
<TR>
<TD>abstract vs concrete syntax</TD>
<TD>X</TD>
<TD>X</TD>
<TD>?</TD>
<TD>-</TD>
<TD>-</TD>
</TR>
<TR>
<TD>type theory</TD>
<TD>X</TD>
<TD>X</TD>
<TD>-</TD>
<TD>-</TD>
<TD>X</TD>
</TR>
<TR>
<TD>records and features</TD>
<TD>X</TD>
<TD>-</TD>
<TD>X</TD>
<TD>X</TD>
<TD>-</TD>
</TR>
</TABLE>
<P></P>
<P>
<!-- NEW -->
</P>
<A NAME="toc4"></A>
<H2>Examples of descriptions in each formalism</H2>
<P>
To be written...
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc5"></A>
<H2>Lambda terms and records</H2>
<P>
In CS, abstract syntax is trees and concrete syntax is strings.
This works more or less for programming languages.
</P>
<P>
In CG, all syntax is lambda terms.
</P>
<P>
In Montague grammar, abstract syntax is lambda terms and
concrete syntax is trees. Abstract syntax as lambda terms
can be considered well-established.
</P>
<P>
In PATR and HPSG, concrete syntax it records. This can be considered
well-established for natural languages.
</P>
<P>
In ACG, both are lambda terms. This is more general than GF,
but reversibility requires linearity restriction, which can be
unnatural for grammar writing.
</P>
<P>
In GF, linearization from lambda terms to records is reversible,
and grammar writing is not restricted to linear terms.
</P>
<P>
Grammar composition in ACG is just function composition. In GF,
it is more restricted...
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc6"></A>
<H2>The structure of GF formalisms</H2>
<P>
The following diagram (to be drawn properly!) describes the
levels.
</P>
<PRE>
| programming language design
V
GF source language
|
| type-directed partial evaluation
V
GFC assembly language
|
| Ljunglöf's translation
V
MCFG parser
</PRE>
<P>
The last two phases are nontrivial mathematica properties.
</P>
<P>
In most grammar formalisms, grammarians have to work on the GFC
(or MCFG) level.
</P>
<P>
Maybe they use macros - they are therefore like macro assemblers. But there
are no separately compiled library modules, no type checking, etc.
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc7"></A>
<H2>The expressivity of GF</H2>
<P>
Parsing complexity is the same as MCFG: polynomial, with
unrestricted exponent depending on grammar.
This is between TAG and HPSG.
</P>
<P>
If semantic well-formedness (type theory) is taken into account,
then arbitrary logic can be expressed. The well-formedness of
abstract syntax is decidable, but the well-formedness of a
concrete-syntax string can require an arbitrary proof construction
and is therefore undecidable.
</P>
<P>
Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF
has the goal of assigning intended trees for strings. This is
generalized to shared trees for different languages.
</P>
<P>
The high-level language strives after the properties of
writability and readability (programming language notions).
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc8"></A>
<H2>Grammars and parsing</H2>
<P>
In many projects, a grammar is just seen as a <B>declarative parsing program</B>.
</P>
<P>
For GF, a grammar is primarily the <B>definition of a language</B>.
</P>
<P>
Detaching grammars from parsers is a good idea, giving
</P>
<UL>
<LI>more efficient and robust parsing (statistical etc)
<LI>cleaner grammars
</UL>
<P>
Separating abstract from concrete syntax is a prerequisite for this:
we want parsers to return abstract syntax objects, and these must exist
independently of parse trees.
</P>
<P>
A possible radical approach to parsing:
use a grammar to generate a treebank and machine-learn
a statistical parser from this.
</P>
<P>
Comparison: Steedman in CCG has done something like this.
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc9"></A>
<H2>Grammars as software libraries</H2>
<P>
Reuse for different purposes.
</P>
<P>
Grammar composition.
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc10"></A>
<H2>Multilinguality</H2>
<P>
In <B>application grammars</B>, the AS is a semantic
model, and a CS covers domain terminology and idioms.
</P>
<P>
This can give publication-quality translation on
limited domains (e.g. the WebALT project).
</P>
<P>
Resource grammars with grammar composition lead to
<B>compile-time transfer</B>.
</P>
<P>
When is <B>run-time transfer</B> necessary?
</P>
<P>
Cf. CLE (Core Language Engine).
</P>
<P>
<!-- NEW -->
</P>
<A NAME="toc11"></A>
<H2>Parametrized modules</H2>
<P>
This notion comes from the ML language in the 1980's.
</P>
<P>
It can be used for sharing even more code between languages
than their AS.
</P>
<P>
Especially, for related languages (Scandinavian, Romance).
</P>
<P>
Cf. grammar porting in CLE: what they do with untyped
macro packages GF does with typable interfaces.
</P>
<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags -thtml -\-toc gf-formalism.txt -->
</BODY></HTML>

View File

@@ -1,279 +0,0 @@
A Birds-Eye View of GF as a Grammar Formalism
Author: Aarne Ranta
Last update: %%date(%c)
% NOTE: this is a txt2tags file.
% Create an html file from this file using:
% txt2tags -thtml --toc gf-formalism.txt
%!target:html
%!postproc(html): #NEW <!-- NEW -->
[Logos/gf0.png]
//Abstract. This document gives a general description of the//
//Grammatical Framework (GF), with comparisons to other grammar//
//formalisms such as CG, ACG, HPSG, and LFG.//
#NEW
==Logical Frameworks and Grammar Formalisms==
Logic - formalization of mathematics (mathematical language?)
Linguistics - formalization of natural language
Since math lang is a subset, we can expect similarities.
But in natural language we have
- masses of empirical data
- no right of reform
#NEW
==High-level programming==
We have to write a lot of program code when formalizing language.
We need a language with proper abstractions.
Cf. Paul Graham on Prolog: very high-level, but wrong abstractions.
Typed functional languages work well in maths.
We have developed one for linguistics
- some extra constructs, e.g. inflection tables
- constraint of reversibility (nontrivial math problem)
Writing a grammar of e.g. French clitics should not be a topic
on which one can write a paper - it should be easy to render in code
the known facts about languages!
#NEW
==GF in a few words==
Grammatical Framework (GF) is a grammar formalism
based on **constructive type theory**.
GF makes a distinction between **abstract syntax** and **concrete syntax**.
The abstract syntax part of GF is a **logical framework**, with
dependent types and higher-order functions.
The concrete syntax is a system of **records** containing strings and features.
A GF grammar defines a **reversible homomorphism** from an abstract syntax to a
concrete syntax.
A **multilingual GF grammar** is a set of concrete syntaxes associated with
one abstract syntax.
GF grammars are written in a high-level **functional programming language**,
which is compiled into a **core language** (GFC).
GF grammars can be used as **resources**, i.e. as libraries for writing
new grammars; these are compiled and optimized by the method of
**grammar composition**.
GF has a **module system** that supports grammar engineering and separate
compilation.
#NEW
==History of GF==
1988. Intuitionistic Categorial Grammar; type theory as abstract syntax,
playing the role of Montague's analysis trees. Grammars implemented in Prolog.
1994. Type-Theoretical Grammar. Abstract syntax organized as a system of
combinators. Grammars implemented in ALF.
1996. Multilingual Type-Theoretical Grammar. Rules for generating six
languages from the same abstract syntax. Grammars implemented in ALF, ML, and
Haskell.
1998. The first implementation of GF as a language of its own.
2000. New version of GF: high-level functional source language, records used
for concrete syntax.
2003. The module system.
2004. Ljunglöf's thesis //Expressivity and Complexity of GF//.
#NEW
==Some key ingredients of GF in other grammar formalisms==
- [GF ]: Grammatical Framework
- [CG ]: categorial grammar
- [ACG ]: abstract categorial grammar
- [HPSG ]: head-driven phrase structure grammar
- [LFG ]: lexical functional grammar
| / | GF | ACG | LFG | HPSG | CG |
| abstract vs concrete syntax | X | X | ? | - | - |
| type theory | X | X | - | - | X |
| records and features | X | - | X | X | - |
#NEW
==Examples of descriptions in each formalism==
To be written...
#NEW
==Lambda terms and records==
In CS, abstract syntax is trees and concrete syntax is strings.
This works more or less for programming languages.
In CG, all syntax is lambda terms.
In Montague grammar, abstract syntax is lambda terms and
concrete syntax is trees. Abstract syntax as lambda terms
can be considered well-established.
In PATR and HPSG, concrete syntax it records. This can be considered
well-established for natural languages.
In ACG, both are lambda terms. This is more general than GF,
but reversibility requires linearity restriction, which can be
unnatural for grammar writing.
In GF, linearization from lambda terms to records is reversible,
and grammar writing is not restricted to linear terms.
Grammar composition in ACG is just function composition. In GF,
it is more restricted...
#NEW
==The structure of GF formalisms==
The following diagram (to be drawn properly!) describes the
levels.
```
| programming language design
V
GF source language
|
| type-directed partial evaluation
V
GFC assembly language
|
| Ljunglöf's translation
V
MCFG parser
```
The last two phases are nontrivial mathematica properties.
In most grammar formalisms, grammarians have to work on the GFC
(or MCFG) level.
Maybe they use macros - they are therefore like macro assemblers. But there
are no separately compiled library modules, no type checking, etc.
#NEW
==The expressivity of GF==
Parsing complexity is the same as MCFG: polynomial, with
unrestricted exponent depending on grammar.
This is between TAG and HPSG.
If semantic well-formedness (type theory) is taken into account,
then arbitrary logic can be expressed. The well-formedness of
abstract syntax is decidable, but the well-formedness of a
concrete-syntax string can require an arbitrary proof construction
and is therefore undecidable.
Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF
has the goal of assigning intended trees for strings. This is
generalized to shared trees for different languages.
The high-level language strives after the properties of
writability and readability (programming language notions).
#NEW
==Grammars and parsing==
In many projects, a grammar is just seen as a **declarative parsing program**.
For GF, a grammar is primarily the **definition of a language**.
Detaching grammars from parsers is a good idea, giving
- more efficient and robust parsing (statistical etc)
- cleaner grammars
Separating abstract from concrete syntax is a prerequisite for this:
we want parsers to return abstract syntax objects, and these must exist
independently of parse trees.
A possible radical approach to parsing:
use a grammar to generate a treebank and machine-learn
a statistical parser from this.
Comparison: Steedman in CCG has done something like this.
#NEW
==Grammars as software libraries==
Reuse for different purposes.
Grammar composition.
#NEW
==Multilinguality==
In **application grammars**, the AS is a semantic
model, and a CS covers domain terminology and idioms.
This can give publication-quality translation on
limited domains (e.g. the WebALT project).
Resource grammars with grammar composition lead to
**compile-time transfer**.
When is **run-time transfer** necessary?
Cf. CLE (Core Language Engine).
#NEW
==Parametrized modules==
This notion comes from the ML language in the 1980's.
It can be used for sharing even more code between languages
than their AS.
Especially, for related languages (Scandinavian, Romance).
Cf. grammar porting in CLE: what they do with untyped
macro packages GF does with typable interfaces.

View File

@@ -1,311 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
<TITLE>GF Project Ideas</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P>
<center>
<IMG ALIGN="middle" SRC="Logos/gf0.png" BORDER="0" ALT="">
</center>
</P>
<P ALIGN="center"><CENTER>
<H1>GF Project Ideas</H1>
<FONT SIZE="4">
<I>Resource Grammars, Web Applications, etc</I><BR>
contact: Aarne Ranta (aarne at chalmers dot se)
</FONT></CENTER>
<P></P>
<HR NOSHADE SIZE=1>
<P></P>
<UL>
<LI><A HREF="#toc1">Resource Grammar Implementations</A>
<UL>
<LI><A HREF="#toc2">Tasks</A>
<LI><A HREF="#toc3">Who is qualified</A>
<LI><A HREF="#toc4">The Summer School</A>
</UL>
<LI><A HREF="#toc5">Other project ideas</A>
<UL>
<LI><A HREF="#toc6">GF interpreter in Java</A>
<LI><A HREF="#toc7">GF interpreter in C#</A>
<LI><A HREF="#toc8">GF localization library</A>
<LI><A HREF="#toc9">Multilingual grammar applications for mobile phones</A>
<LI><A HREF="#toc10">Multilingual grammar applications for the web</A>
<LI><A HREF="#toc11">GMail gadget for GF</A>
</UL>
<LI><A HREF="#toc12">Dissemination and intellectual property</A>
</UL>
<P></P>
<HR NOSHADE SIZE=1>
<P></P>
<A NAME="toc1"></A>
<H2>Resource Grammar Implementations</H2>
<P>
GF Resource Grammar Library is an open-source computational grammar resource
that currently covers 12 languages.
The Library is a collaborative effort to which programmers from many countries
have contributed. The next goal is to extend the library
to all of the 23 official EU languages. Also other languages
are welcome all the time. The following diagram show the current status of the
library. Each of the red and yellow ones are a potential project.
</P>
<P>
<center>
<IMG ALIGN="middle" SRC="school-langs.png" BORDER="0" ALT="">
</center>
</P>
<P>
<I>red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu</I>
</P>
<P>
The linguistic coverage of the library includes the inflectional morphology
and basic syntax of each language. It can be used in GF applications
and also ported to other formats. It can also be used for building other
linguistic resources, such as morphological lexica and parsers.
The library is licensed under LGPL.
</P>
<A NAME="toc2"></A>
<H3>Tasks</H3>
<P>
Writing a grammar for a language is usually easier if other languages
from the same family already have grammars. The colours have the same
meaning as in the diagram above; in addition, we use boldface for the
red, still unimplemented languages and italics for the
orange languages in progress. Thus, in particular, each of the languages
coloured red below are possible programming projects.
</P>
<P>
Baltic:
</P>
<UL>
<LI><font color="red"><b> Latvian </b></font>
<LI><font color="red"><b> Lithuanian </b></font>
</UL>
<P>
Celtic:
</P>
<UL>
<LI><font color="red"><b> Irish </b></font>
</UL>
<P>
Fenno-Ugric:
</P>
<UL>
<LI><font color="red"><b> Estonian </b></font>
<LI><font color="green" size="-1"> Finnish </font>
<LI><font color="red"><b> Hungarian </b></font>
</UL>
<P>
Germanic:
</P>
<UL>
<LI><font color="green" size="-1"> Danish </font>
<LI><font color="red"><b> Dutch </b></font>
<LI><font color="green" size="-1"> English </font>
<LI><font color="green" size="-1"> German </font>
<LI><font color="green" size="-1"> Norwegian </font>
<LI><font color="green" size="-1"> Swedish </font>
</UL>
<P>
Hellenic:
</P>
<UL>
<LI><font color="red"><b> Greek </b></font>
</UL>
<P>
Indo-Iranian:
</P>
<UL>
<LI><font color="orange"><i> Hindi </i></font>
<LI><font color="orange"><i> Urdu </i></font>
</UL>
<P>
Romance:
</P>
<UL>
<LI><font color="green" size="-1"> Catalan </font>
<LI><font color="green" size="-1"> French </font>
<LI><font color="green" size="-1"> Italian </font>
<LI><font color="red"><b> Portuguese </b></font>
<LI><font color="orange"><i> Romanian </i></font>
<LI><font color="green" size="-1"> Spanish </font>
</UL>
<P>
Semitic:
</P>
<UL>
<LI><font color="orange"><i> Arabic </i></font>
<LI><font color="red"><b> Maltese </b></font>
</UL>
<P>
Slavonic:
</P>
<UL>
<LI><font color="green" size="-1"> Bulgarian </font>
<LI><font color="red"><b> Czech </b></font>
<LI><font color="orange"><i> Polish </i></font>
<LI><font color="green" size="-1"> Russian </font>
<LI><font color="red"><b> Slovak </b></font>
<LI><font color="red"><b> Slovenian </b></font>
</UL>
<P>
Tai:
</P>
<UL>
<LI><font color="orange"><i> Thai </i></font>
</UL>
<P>
Turkic:
</P>
<UL>
<LI><font color="orange"><i> Turkish </i></font>
</UL>
<A NAME="toc3"></A>
<H3>Who is qualified</H3>
<P>
Writing a resource grammar implementation requires good general programming
skills, and a good explicit knowledge of the grammar of the target language.
A typical participant could be
</P>
<UL>
<LI>native or fluent speaker of the target language
<LI>interested in languages on the theoretical level, and preferably familiar
with many languages (to be able to think about them on an abstract level)
<LI>familiar with functional programming languages such as ML or Haskell
(GF itself is a language similar to these)
<LI>on Master's or PhD level in linguistics, computer science, or mathematics
</UL>
<P>
But it is the quality of the assignment that is assessed, not any formal
requirements. The "typical participant" was described to give an idea of
who is likely to succeed in this.
</P>
<A NAME="toc4"></A>
<H3>The Summer School</H3>
<P>
A Summer School on resource grammars and applications will
be organized at the campus of Chalmers University of Technology in Gothenburg,
Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in
a resource grammar project; the participants are assumed to learn GF before
the Summer School, but how far they have come in their projects may vary.
</P>
<P>
More information on the Summer School web page:
</P>
<P>
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html"><CODE>http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html</CODE></A>
</P>
<A NAME="toc5"></A>
<H2>Other project ideas</H2>
<A NAME="toc6"></A>
<H3>GF interpreter in Java</H3>
<P>
The idea is to write a run-time system for GF grammars in Java. This enables
the use of <B>embedded grammars</B> in Java applications. This project is
a fresh-up of <A HREF="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">earlier work</A>,
now using the new run-time format PGF and addressing a new parsing algorithm.
</P>
<P>
Requirements: Java, Haskell, basics of compilers and parsing algorithms.
</P>
<A NAME="toc7"></A>
<H3>GF interpreter in C#</H3>
<P>
The idea is to write a run-time system for GF grammars in C#. This enables
the use of <B>embedded grammars</B> in C# applications. This project is
similar to <A HREF="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">earlier work</A>
on Java, now addressing C# and using the new run-time format PGF.
</P>
<P>
Requirements: C#, Haskell, basics of compilers and parsing algorithms.
</P>
<A NAME="toc8"></A>
<H3>GF localization library</H3>
<P>
This is an idea for a software localization library using GF grammars.
The library should replace strings by grammar rules, which can be conceived
as very smart templates always guaranteeing grammatically correct output.
The library should be based on the
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html">GF Resource Grammar Library</A>, providing infrastructure
currently for 12 languages.
</P>
<P>
Requirements: GF, some natural languages, some localization platform
</P>
<A NAME="toc9"></A>
<H3>Multilingual grammar applications for mobile phones</H3>
<P>
GF grammars can be compiled into programs that can be run on different
platforms, such as web browsers and mobile phones. An example is a
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html">numeral translator</A> running on both these platforms.
</P>
<P>
The proposed project is rather open: find some cool applications of
the technology that are useful or entertaining for mobile phone users. A
part of the project is to investigate implementation issues such as making
the best use of the phone's resources. Possible applications have
something to do with translation; one suggestion is an sms editor/translator.
</P>
<P>
Requirements: GF, JavaScript, some phone application development tools
</P>
<A NAME="toc10"></A>
<H3>Multilingual grammar applications for the web</H3>
<P>
This project is rather open: find some cool applications of
the technology that are useful or entertaining on the web. Examples include
</P>
<UL>
<LI>translators: see <A HREF="http://tournesol.cs.chalmers.se:41296/translate">demo</A>
<LI>multilingual wikis: see <A HREF="http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi">demo</A>
<LI>fridge magnets: see <A HREF="http://tournesol.cs.chalmers.se:41296/fridge">demo</A>
</UL>
<P>
Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI
</P>
<A NAME="toc11"></A>
<H3>GMail gadget for GF</H3>
<P>
It is possible to add custom gadgets to GMail. If you are going to write
e-mail in a foreign language then you probably will need help from
dictonary or you may want to check something in the grammar. GF provides
all resources that you may need but you have to think about how to
design gadget that fits well in the GMail environment and what
functionality from GF you want to expose.
</P>
<P>
Requirements: GF, Google Web Toolkit
</P>
<A NAME="toc12"></A>
<H2>Dissemination and intellectual property</H2>
<P>
All code suggested here will be released under the LGPL just like
the current resource grammars and run-time GF libraries,
with the copyright held by respective authors.
</P>
<P>
As a rule, the code will be distributed via the GF web site.
</P>
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags -\-toc gf-ideas.txt -->
</BODY></HTML>

View File

@@ -1,231 +0,0 @@
GF Project Ideas
Resource Grammars, Web Applications, etc
contact: Aarne Ranta (aarne at chalmers dot se)
%!Encoding : iso-8859-1
%!target:html
%!postproc(html): #BECE <center>
%!postproc(html): #ENCE </center>
%!postproc(html): #GRAY <font color="green" size="-1">
%!postproc(html): #EGRAY </font>
%!postproc(html): #RED <font color="red"><b>
%!postproc(html): #YELLOW <font color="orange"><i>
%!postproc(html): #ERED </b></font>
%!postproc(html): #EYELLOW </i></font>
#BECE
[Logos/gf0.png]
#ENCE
==Resource Grammar Implementations==
GF Resource Grammar Library is an open-source computational grammar resource
that currently covers 12 languages.
The Library is a collaborative effort to which programmers from many countries
have contributed. The next goal is to extend the library
to all of the 23 official EU languages. Also other languages
are welcome all the time. The following diagram show the current status of the
library. Each of the red and yellow ones are a potential project.
#BECE
[school-langs.png]
#ENCE
//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu//
The linguistic coverage of the library includes the inflectional morphology
and basic syntax of each language. It can be used in GF applications
and also ported to other formats. It can also be used for building other
linguistic resources, such as morphological lexica and parsers.
The library is licensed under LGPL.
===Tasks===
Writing a grammar for a language is usually easier if other languages
from the same family already have grammars. The colours have the same
meaning as in the diagram above; in addition, we use boldface for the
red, still unimplemented languages and italics for the
orange languages in progress. Thus, in particular, each of the languages
coloured red below are possible programming projects.
Baltic:
- #RED Latvian #ERED
- #RED Lithuanian #ERED
Celtic:
- #RED Irish #ERED
Fenno-Ugric:
- #RED Estonian #ERED
- #GRAY Finnish #EGRAY
- #RED Hungarian #ERED
Germanic:
- #GRAY Danish #EGRAY
- #RED Dutch #ERED
- #GRAY English #EGRAY
- #GRAY German #EGRAY
- #GRAY Norwegian #EGRAY
- #GRAY Swedish #EGRAY
Hellenic:
- #RED Greek #ERED
Indo-Iranian:
- #YELLOW Hindi #EYELLOW
- #YELLOW Urdu #EYELLOW
Romance:
- #GRAY Catalan #EGRAY
- #GRAY French #EGRAY
- #GRAY Italian #EGRAY
- #RED Portuguese #ERED
- #YELLOW Romanian #EYELLOW
- #GRAY Spanish #EGRAY
Semitic:
- #YELLOW Arabic #EYELLOW
- #RED Maltese #ERED
Slavonic:
- #GRAY Bulgarian #EGRAY
- #RED Czech #ERED
- #YELLOW Polish #EYELLOW
- #GRAY Russian #EGRAY
- #RED Slovak #ERED
- #RED Slovenian #ERED
Tai:
- #YELLOW Thai #EYELLOW
Turkic:
- #YELLOW Turkish #EYELLOW
===Who is qualified===
Writing a resource grammar implementation requires good general programming
skills, and a good explicit knowledge of the grammar of the target language.
A typical participant could be
- native or fluent speaker of the target language
- interested in languages on the theoretical level, and preferably familiar
with many languages (to be able to think about them on an abstract level)
- familiar with functional programming languages such as ML or Haskell
(GF itself is a language similar to these)
- on Master's or PhD level in linguistics, computer science, or mathematics
But it is the quality of the assignment that is assessed, not any formal
requirements. The "typical participant" was described to give an idea of
who is likely to succeed in this.
===The Summer School===
A Summer School on resource grammars and applications will
be organized at the campus of Chalmers University of Technology in Gothenburg,
Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in
a resource grammar project; the participants are assumed to learn GF before
the Summer School, but how far they have come in their projects may vary.
More information on the Summer School web page:
[``http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html`` http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html]
==Other project ideas==
===GF interpreter in Java===
The idea is to write a run-time system for GF grammars in Java. This enables
the use of **embedded grammars** in Java applications. This project is
a fresh-up of [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html],
now using the new run-time format PGF and addressing a new parsing algorithm.
Requirements: Java, Haskell, basics of compilers and parsing algorithms.
===GF interpreter in C#===
The idea is to write a run-time system for GF grammars in C#. This enables
the use of **embedded grammars** in C# applications. This project is
similar to [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html]
on Java, now addressing C# and using the new run-time format PGF.
Requirements: C#, Haskell, basics of compilers and parsing algorithms.
===GF localization library===
This is an idea for a software localization library using GF grammars.
The library should replace strings by grammar rules, which can be conceived
as very smart templates always guaranteeing grammatically correct output.
The library should be based on the
[GF Resource Grammar Library http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html], providing infrastructure
currently for 12 languages.
Requirements: GF, some natural languages, some localization platform
===Multilingual grammar applications for mobile phones===
GF grammars can be compiled into programs that can be run on different
platforms, such as web browsers and mobile phones. An example is a
[numeral translator http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html] running on both these platforms.
The proposed project is rather open: find some cool applications of
the technology that are useful or entertaining for mobile phone users. A
part of the project is to investigate implementation issues such as making
the best use of the phone's resources. Possible applications have
something to do with translation; one suggestion is an sms editor/translator.
Requirements: GF, JavaScript, some phone application development tools
===Multilingual grammar applications for the web===
This project is rather open: find some cool applications of
the technology that are useful or entertaining on the web. Examples include
- translators: see [demo http://129.16.250.57:41296/translate]
- multilingual wikis: see [demo http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi]
- fridge magnets: see [demo http://129.16.250.57:41296/fridge]
Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI
===GMail gadget for GF===
It is possible to add custom gadgets to GMail. If you are going to write
e-mail in a foreign language then you probably will need help from
dictonary or you may want to check something in the grammar. GF provides
all resources that you may need but you have to think about how to
design gadget that fits well in the GMail environment and what
functionality from GF you want to expose.
Requirements: GF, Google Web Toolkit
==Dissemination and intellectual property==
All code suggested here will be released under the LGPL just like
the current resource grammars and run-time GF libraries,
with the copyright held by respective authors.
As a rule, the code will be distributed via the GF web site.

View File

@@ -1,289 +0,0 @@
(Adapted from KeY statistics by Vladimir Klebanov)
This is GF right now:
Total Physical Source Lines of Code (SLOC) = 42,467
Development Effort Estimate, Person-Years (Person-Months) = 10.24 (122.932)
(Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05))
Schedule Estimate, Years (Months) = 1.30 (15.56)
(Basic COCOMO model, Months = 2.5 * (person-months**0.38))
Estimated Average Number of Developers (Effort/Schedule) = 7.90
Total Estimated Cost to Develop = $ 1,383,870
(average salary = $56,286/year, overhead = 2.40).
SLOCCount, Copyright (C) 2001-2004 David A. Wheeler
----------- basis of counting: Haskell code + BNFC code - generated Happy parsers
-- GF/src% wc -l *.hs GF/*.hs GF/*/*.hs GF/*/*/*.hs GF/*/*.cf JavaGUI/*.java
-- date Fri Jun 3 10:00:31 CEST 2005
104 GF.hs
402 GF/API.hs
98 GF/GFModes.hs
379 GF/Shell.hs
4 GF/Today.hs
43 GF/API/BatchTranslate.hs
145 GF/API/GrammarToHaskell.hs
77 GF/API/IOGrammar.hs
25 GF/API/MyParser.hs
177 GF/Canon/AbsGFC.hs
37 GF/Canon/ByLine.hs
192 GF/Canon/CanonToGrammar.hs
293 GF/Canon/CMacros.hs
79 GF/Canon/GetGFC.hs
86 GF/Canon/GFC.hs
291 GF/Canon/LexGFC.hs
201 GF/Canon/Look.hs
235 GF/Canon/MkGFC.hs
46 GF/Canon/PrExp.hs
352 GF/Canon/PrintGFC.hs
147 GF/Canon/Share.hs
207 GF/Canon/SkelGFC.hs
46 GF/Canon/TestGFC.hs
49 GF/Canon/Unlex.hs
202 GF/CF/CanonToCF.hs
213 GF/CF/CF.hs
217 GF/CF/CFIdent.hs
62 GF/CF/CFtoGrammar.hs
47 GF/CF/CFtoSRG.hs
206 GF/CF/ChartParser.hs
191 GF/CF/EBNF.hs
45 GF/CFGM/AbsCFG.hs
312 GF/CFGM/LexCFG.hs
157 GF/CFGM/PrintCFG.hs
109 GF/CFGM/PrintCFGrammar.hs
85 GF/CF/PPrCF.hs
150 GF/CF/PrLBNF.hs
106 GF/CF/Profile.hs
141 GF/Compile/BackOpt.hs
763 GF/Compile/CheckGrammar.hs
337 GF/Compile/Compile.hs
136 GF/Compile/Extend.hs
124 GF/Compile/GetGrammar.hs
282 GF/Compile/GrammarToCanon.hs
93 GF/Compile/MkConcrete.hs
128 GF/Compile/MkResource.hs
83 GF/Compile/MkUnion.hs
146 GF/Compile/ModDeps.hs
294 GF/Compile/NewRename.hs
227 GF/Compile/Optimize.hs
76 GF/Compile/PGrammar.hs
84 GF/Compile/PrOld.hs
119 GF/Compile/Rebuild.hs
63 GF/Compile/RemoveLiT.hs
274 GF/Compile/Rename.hs
535 GF/Compile/ShellState.hs
135 GF/Compile/Update.hs
129 GF/Conversion/GFC.hs
149 GF/Conversion/GFCtoSimple.hs
53 GF/Conversion/MCFGtoCFG.hs
46 GF/Conversion/RemoveEpsilon.hs
102 GF/Conversion/RemoveErasing.hs
82 GF/Conversion/RemoveSingletons.hs
137 GF/Conversion/SimpleToFinite.hs
26 GF/Conversion/SimpleToMCFG.hs
230 GF/Conversion/Types.hs
143 GF/Data/Assoc.hs
118 GF/Data/BacktrackM.hs
20 GF/Data/ErrM.hs
119 GF/Data/GeneralDeduction.hs
30 GF/Data/Glue.hs
67 GF/Data/IncrementalDeduction.hs
61 GF/Data/Map.hs
662 GF/Data/Operations.hs
127 GF/Data/OrdMap2.hs
120 GF/Data/OrdSet.hs
193 GF/Data/Parsers.hs
64 GF/Data/RedBlack.hs
150 GF/Data/RedBlackSet.hs
19 GF/Data/SharedString.hs
127 GF/Data/SortedList.hs
134 GF/Data/Str.hs
120 GF/Data/Trie2.hs
129 GF/Data/Trie.hs
71 GF/Data/Utilities.hs
243 GF/Data/Zipper.hs
78 GF/Embed/EmbedAPI.hs
113 GF/Embed/EmbedCustom.hs
137 GF/Embed/EmbedParsing.hs
50 GF/Formalism/CFG.hs
51 GF/Formalism/GCFG.hs
58 GF/Formalism/MCFG.hs
246 GF/Formalism/SimpleGFC.hs
349 GF/Formalism/Utilities.hs
30 GF/Fudgets/ArchEdit.hs
134 GF/Fudgets/CommandF.hs
51 GF/Fudgets/EventF.hs
59 GF/Fudgets/FudgetOps.hs
37 GF/Fudgets/UnicodeF.hs
86 GF/Grammar/AbsCompute.hs
38 GF/Grammar/Abstract.hs
149 GF/Grammar/AppPredefined.hs
312 GF/Grammar/Compute.hs
215 GF/Grammar/Grammar.hs
46 GF/Grammar/Lockfield.hs
189 GF/Grammar/LookAbs.hs
182 GF/Grammar/Lookup.hs
745 GF/Grammar/Macros.hs
340 GF/Grammar/MMacros.hs
115 GF/Grammar/PatternMatch.hs
279 GF/Grammar/PrGrammar.hs
121 GF/Grammar/Refresh.hs
44 GF/Grammar/ReservedWords.hs
251 GF/Grammar/TC.hs
301 GF/Grammar/TypeCheck.hs
96 GF/Grammar/Unify.hs
101 GF/Grammar/Values.hs
89 GF/Infra/CheckM.hs
43 GF/Infra/Comments.hs
152 GF/Infra/Ident.hs
390 GF/Infra/Modules.hs
358 GF/Infra/Option.hs
179 GF/Infra/Print.hs
331 GF/Infra/ReadFiles.hs
337 GF/Infra/UseIO.hs
153 GF/OldParsing/CFGrammar.hs
283 GF/OldParsing/ConvertFiniteGFC.hs
121 GF/OldParsing/ConvertFiniteSimple.hs
34 GF/OldParsing/ConvertGFCtoMCFG.hs
122 GF/OldParsing/ConvertGFCtoSimple.hs
44 GF/OldParsing/ConvertGrammar.hs
52 GF/OldParsing/ConvertMCFGtoCFG.hs
30 GF/OldParsing/ConvertSimpleToMCFG.hs
43 GF/OldParsing/GCFG.hs
86 GF/OldParsing/GeneralChart.hs
148 GF/OldParsing/GrammarTypes.hs
50 GF/OldParsing/IncrementalChart.hs
206 GF/OldParsing/MCFGrammar.hs
43 GF/OldParsing/ParseCFG.hs
82 GF/OldParsing/ParseCF.hs
177 GF/OldParsing/ParseGFC.hs
37 GF/OldParsing/ParseMCFG.hs
161 GF/OldParsing/SimpleGFC.hs
188 GF/OldParsing/Utilities.hs
51 GF/Parsing/CFG.hs
66 GF/Parsing/CF.hs
151 GF/Parsing/GFC.hs
64 GF/Parsing/MCFG.hs
83 GF/Printing/PrintParser.hs
127 GF/Printing/PrintSimplifiedTerm.hs
190 GF/Shell/CommandL.hs
556 GF/Shell/Commands.hs
524 GF/Shell/HelpFile.hs
79 GF/Shell/JGF.hs
171 GF/Shell/PShell.hs
221 GF/Shell/ShellCommands.hs
66 GF/Shell/SubShell.hs
87 GF/Shell/TeachYourself.hs
296 GF/Source/AbsGF.hs
229 GF/Source/GrammarToSource.hs
312 GF/Source/LexGF.hs
528 GF/Source/PrintGF.hs
353 GF/Source/SkelGF.hs
657 GF/Source/SourceToGrammar.hs
58 GF/Source/TestGF.hs
72 GF/Speech/PrGSL.hs
65 GF/Speech/PrJSGF.hs
128 GF/Speech/SRG.hs
103 GF/Speech/TransformCFG.hs
30 GF/System/ArchEdit.hs
90 GF/System/Arch.hs
27 GF/System/NoReadline.hs
27 GF/System/Readline.hs
73 GF/System/Tracing.hs
25 GF/System/UseReadline.hs
63 GF/Text/Arabic.hs
97 GF/Text/Devanagari.hs
72 GF/Text/Ethiopic.hs
99 GF/Text/ExtendedArabic.hs
37 GF/Text/ExtraDiacritics.hs
172 GF/Text/Greek.hs
53 GF/Text/Hebrew.hs
95 GF/Text/Hiragana.hs
69 GF/Text/LatinASupplement.hs
47 GF/Text/OCSCyrillic.hs
45 GF/Text/Russian.hs
77 GF/Text/Tamil.hs
125 GF/Text/Text.hs
69 GF/Text/Unicode.hs
47 GF/Text/UTF8.hs
56 GF/Translate/GFT.hs
427 GF/UseGrammar/Custom.hs
435 GF/UseGrammar/Editing.hs
180 GF/UseGrammar/Generate.hs
71 GF/UseGrammar/GetTree.hs
143 GF/UseGrammar/Information.hs
228 GF/UseGrammar/Linear.hs
130 GF/UseGrammar/Morphology.hs
70 GF/UseGrammar/Paraphrases.hs
157 GF/UseGrammar/Parsing.hs
66 GF/UseGrammar/Randomized.hs
170 GF/UseGrammar/Session.hs
186 GF/UseGrammar/Tokenize.hs
43 GF/UseGrammar/Transfer.hs
122 GF/Visualization/NewVisualizationGrammar.hs
123 GF/Visualization/VisualizeGrammar.hs
63 GF/Conversion/SimpleToMCFG/Coercions.hs
256 GF/Conversion/SimpleToMCFG/Nondet.hs
129 GF/Conversion/SimpleToMCFG/Strict.hs
71 GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs
281 GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs
277 GF/OldParsing/ConvertGFCtoMCFG/Old.hs
189 GF/OldParsing/ConvertGFCtoMCFG/Strict.hs
70 GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs
245 GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs
277 GF/OldParsing/ConvertSimpleToMCFG/Old.hs
139 GF/OldParsing/ConvertSimpleToMCFG/Strict.hs
83 GF/OldParsing/ParseCFG/General.hs
142 GF/OldParsing/ParseCFG/Incremental.hs
156 GF/OldParsing/ParseMCFG/Basic.hs
103 GF/Parsing/CFG/General.hs
150 GF/Parsing/CFG/Incremental.hs
98 GF/Parsing/CFG/PInfo.hs
226 GF/Parsing/MCFG/Active2.hs
304 GF/Parsing/MCFG/Active.hs
144 GF/Parsing/MCFG/Incremental2.hs
163 GF/Parsing/MCFG/Incremental.hs
128 GF/Parsing/MCFG/Naive.hs
163 GF/Parsing/MCFG/PInfo.hs
194 GF/Parsing/MCFG/Range.hs
183 GF/Parsing/MCFG/ViaCFG.hs
167 GF/Canon/GFC.cf
36 GF/CFGM/CFG.cf
321 GF/Source/GF.cf
272 JavaGUI/DynamicTree2.java
272 JavaGUI/DynamicTree.java
2357 JavaGUI/GFEditor2.java
1420 JavaGUI/GFEditor.java
30 JavaGUI/GrammarFilter.java
13 JavaGUI/LinPosition.java
18 JavaGUI/MarkedArea.java
1552 JavaGUI/Numerals.java
22 JavaGUI/Utils.java
5956 total
48713 total
- 2131 GF/Canon/ParGFC.hs
3336 GF/Source/ParGF.hs
779 GF/CFGM/ParCFG.hs
42467 total
--------
sloccount sloc =
let
ksloc = sloc / 1000
effort = 2.4 * (ksloc ** 1.05)
schedule = 2.5 * (effort ** 0.38)
develops = effort / schedule
cost = 56286 * (effort/12) * 2.4
in
[sloc,ksloc,effort,effort/12,schedule,schedule/12,develops,cost]

View File

@@ -1,533 +0,0 @@
GF Resource Grammar Summer School
Gothenburg, 17-28 August 2009
Aarne Ranta (aarne at chalmers.se)
%!Encoding : iso-8859-1
%!target:html
%!postproc(html): #BECE <center>
%!postproc(html): #ENCE </center>
%!postproc(html): #GRAY <font color="green" size="-1">
%!postproc(html): #EGRAY </font>
%!postproc(html): #RED <font color="red">
%!postproc(html): #YELLOW <font color="orange">
%!postproc(html): #ERED </font>
#BECE
[school-langs.png]
#ENCE
//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu//
==News==
An on-line course //GF for Resource Grammar Writers// will start on
Monday 20 April at 15.30 CEST. The slides and recordings of the five
45-minute lectures will be made available via this web page. If requested,
the course may be repeated in the beginning of the summer school.
==Executive summary==
GF Resource Grammar Library is an open-source computational grammar resource
that currently covers 12 languages.
The Summer School is a part of a collaborative effort to extend the library
to all of the 23 official EU languages. Also other languages
chosen by the participants are welcome.
The missing EU languages are:
Czech, Dutch, Estonian, Greek, Hungarian, Irish, Latvian, Lithuanian,
Maltese, Portuguese, Slovak, and Slovenian. There is also more work to
be done on Polish and Romanian.
The linguistic coverage of the library includes the inflectional morphology
and basic syntax of each language. It can be used in GF applications
and also ported to other formats. It can also be used for building other
linguistic resources, such as morphological lexica and parsers.
The library is licensed under LGPL.
In the summer school, each language will be implemented by one or two students
working together. A morphology implementation will be credited
as a Chalmers course worth 7.5 ETCS points; adding a syntax implementation
will be worth more. The estimated total work load is 1-2 months for the
morphology, and 3-6 months for the whole grammar.
Participation in the course is free. Registration is done via the courses's
Google group, [``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]. The registration deadline is 15 June 2009.
Some travel grants will be available. They are distributed on the basis of a
GF programming contest in April and May.
The summer school will be held on 17-28 August 2009, at the campus of
Chalmers University of Technology in Gothenburg, Sweden.
[align6.png]
//Word alignment produced by GF from the resource grammar in Bulgarian, English, Italian, German, Finnish, French, and Swedish.//
==Introduction==
Since 2007, EU-27 has 23 official languages, listed in the diagram on top of this
document. There is a growing need of linguistic resources for these
languages, to help in tasks such as translation and information retrieval.
These resources should be **portable** and **freely accessible**.
Languages marked in red in the diagram are of particular interest for
the summer school, since they are those on which the effort will be concentrated.
GF (Grammatical Framework,
[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf])
is a **functional programming language** designed for writing natural
language grammars. It provides an efficient platform for this task, due to
its modern characteristics:
- It is a functional programming language, similar to Haskell and ML.
- It has a static type system and type checker.
- It has a powerful module system supporting separate compilation
and data abstraction.
- It has an optimizing compiler to **Portable Grammar Format** (PGF).
- PGF can be further compiled to other formats, such as JavaScript and
speech recognition language models.
- GF has a **resource grammar library** giving access to the morphology and
basic syntax of 12 languages.
In addition to "ordinary" grammars for single languages, GF
supports **multilingual grammars**. A multilingual GF grammar consists of an
**abstract syntax** and a set of **concrete syntaxes**.
An abstract syntax is system of **trees**, serving as a semantic
model or an ontology. A concrete syntax is a mapping from abstract syntax
trees to strings of a particular language.
These mappings defined in concrete syntax are **reversible**: they
can be used both for **generating** strings from trees, and for
**parsing** strings into trees. Combinations of generation and
parsing can be used for **translation**, where the abstract
syntax works as an **interlingua**. Thus GF has been used as a
framework for building translation systems in several areas
of application and large sets of languages.
==The GF resource grammar library==
The GF resource grammar library is a set of grammars usable as libraries when
building translation systems and other applications.
The library currently covers
the 9 languages coloured in green in the diagram above; in addition,
Catalan, Norwegian, and Russian are covered, and there is ongoing work on
Arabic, Hindi/Urdu, Polish, Romanian, and Thai.
The purpose of the resource grammar library is to define the "low-level" structure
of a language: inflection, word order, agreement. This structure belongs to what
linguists call morphology and syntax. It can be very complex and requires
a lot of knowledge. Yet, when translating from one language to
another, knowing morphology and syntax is but a part of what is needed.
The translator (whether human
or machine) must understand the meaning of what is translated, and must also know
the idiomatic way to express the meaning in the target language. This knowledge
can be very domain-dependent and requires in general an expert in the field to
reach high quality: a mathematician in the field of mathematics, a meteorologist
in the field of weather reports, etc.
The problem is to find a person who is an expert in both the domain of translation
and in the low-level linguistic details. It is the rareness of this combination
that has made it difficult to build interlingua-based translation systems.
The GF resource grammar library has the mission of helping in this task.
It encapsulates the low-level linguistics in program modules
accessed through easy-to-use interfaces.
Experts on different domains can build translation systems by using the library,
without knowing low-level linguistics. The idea is much the same as when a
programmer builds a graphical user interface (GUI) from high-level elements such as
buttons and menus, without having to care about pixels or geometrical forms.
===Missing EU languages, by the family===
Writing a grammar for a language is usually easier if other languages
from the same family already have grammars. The colours have the same
meaning as in the diagram above.
Baltic:
#RED Latvian #ERED
#RED Lithuanian #ERED
Celtic:
#RED Irish #ERED
Fenno-Ugric:
#RED Estonian #ERED
#GRAY Finnish #EGRAY
#RED Hungarian #ERED
Germanic:
#GRAY Danish #EGRAY
#RED Dutch #ERED
#GRAY English #EGRAY
#GRAY German #EGRAY
#GRAY Swedish #EGRAY
Hellenic:
#RED Greek #ERED
Romance:
#GRAY French #EGRAY
#GRAY Italian #EGRAY
#RED Portuguese #ERED
#YELLOW Romanian #ERED
#GRAY Spanish #EGRAY
Semitic:
#RED Maltese #ERED
Slavonic:
#GRAY Bulgarian #EGRAY
#RED Czech #ERED
#YELLOW Polish #ERED
#RED Slovak #ERED
#RED Slovenian #ERED
===Applications of the library===
In addition to translation, the library is also useful in **localization**,
that is, porting a piece of software to new languages.
The GF resource grammar library has been used in three major projects that need
interlingua-based translation or localization of systems to new languages:
- in KeY,
[``http://www.key-project.org/`` http://www.key-project.org/],
for writing formal and informal software specifications (3 languages)
- in WebALT,
[``http://webalt.math.helsinki.fi/content/index_eng.html`` http://webalt.math.helsinki.fi/content/index_eng.html],
for translating mathematical exercises to 7 languages
- in TALK [``http://www.talk-project.org`` http://www.talk-project.org],
where the library was used for localizing spoken dialogue systems
to six languages
The library is also a generic **linguistic resource**,
which can be used for tasks
such as language teaching and information retrieval. The liberal license (LGPL)
makes it usable for anyone and for any task. GF also has tools supporting the
use of grammars in programs written in other
programming languages: C, C++, Haskell,
Java, JavaScript, and Prolog. In connection with the TALK project,
support has also been
developed for translating GF grammars to language models used in speech
recognition (GSL/Nuance, HTK/ATK, SRGS, JSGF).
===The structure of the library===
The library has the following main parts:
- **Inflection paradigms**, covering the inflection of each language.
- **Core Syntax**, covering a large set of syntax rule that
can be implemented for all languages involved.
- **Common Test Lexicon**, giving ca. 500 common words that can be used for
testing the library.
- **Language-Specific Syntax Extensions**, covering syntax rules that are
not implementable for all languages.
- **Language-Specific Lexica**, word lists for each language, with
accurate morphological and syntactic information.
The goal of the summer school is to implement, for each language, at least
the first three components. The latter three are more open-ended in character.
==The summer school==
The goal of the summer school is to extend the GF resource grammar library
to covering all 23 EU languages, which means we need 15 new languages.
We also welcome other languages than these 23,
if there are interested participants.
The amount of work and skill is between a Master's thesis and a PhD thesis.
The Russian implementation was made by Janna Khegai as a part of her
PhD thesis; the thesis contains other material, too.
The Arabic implementation was started by Ali El Dada in his Master's thesis,
but the thesis does not cover the whole API. The realistic amount of work is
somewhere between 3 and 8 person months,
but this is very much language-dependent.
Dutch, for instance, can profit from previous implementations of German and
Scandinavian languages, and will probably require less work.
Latvian and Lithuanian are the first languages of the Baltic family and
will probably require more work.
In any case, the proposed allocation of work power is 2 participants per
language. They will do 1 months' worth of home work, followed
by 2 weeks of summer school, followed by 4 months work at home.
Who are these participants?
===Selecting participants===
Persons interested to participate in the Summer School should sign up in
the **Google Group** of the course,
[``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]
The registration deadline is 15 June 2009.
Notice: you can sign up in the Google
group even if you are not planning to attend the summer school, but are
just interested in the topic. There will be a separate registration to the
school itself later.
The participants are recommended to learn GF in advance, by self-study from the
[tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html].
This should take a couple of weeks. An **on-line course** will be
arranged on 20-29 April to help in getting started with GF.
At the end of the on-line course, a **programming assignment** will be published.
This assignment will test skills required in resource grammar programming.
Work on the assignment will take a couple of weeks.
Those who are interested in getting a travel grant will submit
their sample resource grammar fragment
to the Summer School Committee by 12 May.
The Committee then decides who is given a travel grant of up to 1000 EUR.
Notice: you can participate in the summer school without following the on-line
course or participating in the contest. These things are required only if you
want a travel grant. If requested by enough many participants, the lectures of
the on-line course will be repeated in the beginning of the summer school.
The summer school itself is devoted for working on resource grammars.
In addition to grammar writing itself, testing and evaluation is
performed. One way to do this is via adding new languages
to resource grammar applications - in particular, to the WebALT mathematical
exercise translator.
The resource grammars are expected to be completed by December 2009. They will
be published at GF website and licensed under LGPL.
The participants are encouraged to contact each other and even work in groups.
===Who is qualified===
Writing a resource grammar implementation requires good general programming
skills, and a good explicit knowledge of the grammar of the target language.
A typical participant could be
- native or fluent speaker of the target language
- interested in languages on the theoretical level, and preferably familiar
with many languages (to be able to think about them on an abstract level)
- familiar with functional programming languages such as ML or Haskell
(GF itself is a language similar to these)
- on Master's or PhD level in linguistics, computer science, or mathematics
But it is the quality of the assignment that is assessed, not any formal
requirements. The "typical participant" was described to give an idea of
who is likely to succeed in this.
===Costs===
The summer school is free of charge.
Some travel grants are given, on the basis of a programming contest,
to cover travel and accommodation costs up to 1000 EUR
per person.
The number of grants will be decided during Spring 2009, and the grand
holders will be notified before the beginning of June.
Special terms will apply to students in
[GSLT http://www.gslt.hum.gu.se/] and
[NGSLT http://ngslt.org/].
===Teachers===
A list of teachers will be published here later. Some of the local teachers
probably involved are the following:
- Krasimir Angelov
- Robin Cooper
- Håkan Burden
- Markus Forsberg
- Harald Hammarström
- Peter Ljunglöf
- Aarne Ranta
More teachers are welcome! If you are interested, please contact us so that
we can discuss your involvement and travel arrangements.
In addition to teachers, we will look for consultants who can help to assess
the results for each language. Please contact us!
===The Summer School Committee===
This committee consists of a number of teachers and informants,
who will select the participants. It will be selected by April 2009.
===Time and Place===
The summer school will
be organized at the campus of Chalmers University of Technology in Gothenburg,
Sweden, on 17-28 August 2009.
Time schedule:
- February: announcement of summer school
- 20-29 April: on-line course
- 12 May: submission deadline for assignment work
- 31 May: review of assignments, notifications of acceptance
- 15 June: **registration deadline**
- 17-28 August: Summer School
- September-December: homework on resource grammars
- December: release of the extended Resource Grammar Library
===Dissemination and intellectual property===
The new resource grammars will be released under the LGPL just like
the current resource grammars,
with the copyright held by respective authors.
The grammars will be distributed via the GF web site.
==Why I should participate==
Seven reasons:
+ participation in a pioneering language technology work in an
enthusiastic atmosphere
+ work and fun with people from all over Europe and the world
+ job opportunities and business ideas
+ credits: the school project will be established as a course at Chalmers worth
7.5 or 15 ETCS points per person, depending on the work accompliched; also
extensions to Master's thesis will be considered (special credit arrangements
for [GSLT http://www.gslt.hum.gu.se/] and [NGSLT http://ngslt.org/])
+ merits: the resulting grammar can easily lead to a published paper (see below)
+ contribution to the multilingual and multicultural development of Europe and the
world
+ free trip and stay in Gothenburg (for travel grant students)
==More information==
[Course Google Group http://groups.google.com/group/gf-resource-school-2009/]
[GF web page http://digitalgrammars.com/gf/]
[GF tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html]
[GF resource synopsis http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html]
[Resource-HOWTO document http://digitalgrammars.com/gf/doc/Resource-HOWTO.html]
===Contact===
Håkan Burden: burden at chalmers se
Aarne Ranta: aarne at chalmers se
===Selected publications from earlier resource grammar projects===
K. Angelov.
Type-Theoretical Bulgarian Grammar.
In B. Nordström and A. Ranta (eds),
//Advances in Natural Language Processing (GoTAL 2008)//,
LNCS/LNAI 5221, Springer,
2008.
B. Bringert.
//Programming Language Techniques for Natural Language Applications//.
Phd thesis, Computer Science, University of Gothenburg,
2008.
A. El Dada and A. Ranta.
Implementing an Open Source Arabic Resource Grammar in GF.
In M. Mughazy (ed),
//Perspectives on Arabic Linguistics XX. Papers from the Twentieth Annual Symposium on Arabic Linguistics, Kalamazoo, March 26//
John Benjamins Publishing Company.
2007.
A. El Dada.
Implementation of the Arabic Numerals and their Syntax in GF.
Computational Approaches to Semitic Languages: Common Issues and Resources,
ACL-2007 Workshop,
June 28, 2007, Prague.
2007.
H. Hammarström and A. Ranta.
Cardinal Numerals Revisited in GF.
//Workshop on Numerals in the World's Languages//.
Dept. of Linguistics Max Planck Institute for Evolutionary Anthropology, Leipzig,
2004.
M. Humayoun, H. Hammarström, and A. Ranta.
Urdu Morphology, Orthography and Lexicon Extraction.
//CAASL-2: The Second Workshop on Computational Approaches to Arabic Script-based Languages//,
July 21-22, 2007, LSA 2007 Linguistic Institute, Stanford University.
2007.
K. Johannisson.
//Formal and Informal Software Specifications.//
Phd thesis, Computer Science, University of Gothenburg,
2005.
J. Khegai.
GF parallel resource grammars and Russian.
In proceedings of ACL2006
(The joint conference of the International Committee on Computational
Linguistics and the Association for Computational Linguistics) (pp. 475-482),
Sydney, Australia, July 2006.
J. Khegai.
//Language engineering in Grammatical Framework (GF)//.
Phd thesis, Computer Science, Chalmers University of Technology,
2006.
W. Ng'ang'a.
Multilingual content development for eLearning in Africa.
eLearning Africa: 1st Pan-African Conference on ICT for Development,
Education and Training. 24-26 May 2006, Addis Ababa, Ethiopia.
2006.
N. Perera and A. Ranta.
Dialogue System Localization with the GF Resource Grammar Library.
//SPEECHGRAM 2007: ACL Workshop on Grammar-Based Approaches to Spoken Language Processing//,
June 29, 2007, Prague.
2007.
A. Ranta.
Modular Grammar Engineering in GF.
//Research on Language and Computation//,
5:133-158, 2007.
A. Ranta.
How predictable is Finnish morphology? An experiment on lexicon construction.
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//,
University of Uppsala,
2008.
A. Ranta. Grammars as Software Libraries.
To appear in
Y. Bertot, G. Huet, J-J. Lévy, and G. Plotkin (eds.),
//From Semantics to Computer Science//,
Cambridge University Press, Cambridge, 2009.
A. Ranta and K. Angelov.
Implementing Controlled Languages in GF.
To appear in the proceedings of //CNL 2009//.

View File

@@ -1,73 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<TITLE>GF 3.0</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>GF 3.0</H1>
<FONT SIZE="4">
<I>Krasimir Angelov, Björn Bringert, and Aarne Ranta</I><BR>
Beta release, 27 June 2008
</FONT></CENTER>
<P>
GF Version 3.0 is a major revision of GF. The source language is a superset of the
language in 2.9, which means backward compatibility. But the target languages, the
compiler implementation, and the functionalities (e.g. the shell) have undergone
radical changes.
</P>
<H2>New features</H2>
<P>
Here is a summary of the main novelties visible to the user:
</P>
<UL>
<LI><B>Size</B>: the source code and the executable binary size have gone
down to about the half of 2.9.
<LI><B>Portability</B>: the new back end format PGF (Portable Grammar Format) is
much simpler than the old GFC format, and therefore easier to port to new
platforms.
<LI><B>Multilingual web page support</B>: as an example of portability, GF 3.0 provides a
compiler from PGF to JavaScript. There are also JavaScript libraries for creating
translators and syntax editors as client-side web applications.
<LI><B>Incremental parsing</B>: there is a possibility of word completion when
input strings are sent to the parser.
<LI><B>Application programmer's interfaces</B>: both source-GF and PGF formats,
the shell, and the compiler are accessible via high-level APIs.
<LI><B>Resource library version 1.4</B>: more coverage, more languages; some of
the new GF language features are exploited.
<LI><B>Uniform character encoding</B>: UTF8 in generated files, user-definable in
source files
</UL>
<H2>Non-supported features</H2>
<P>
There are some features of GF 2.9 that will <I>not</I> work in the 3.0 beta release.
</P>
<UL>
<LI>Java Editor GUI: we now see the JavaScript editor as the main form of
syntax editing.
<LI>Pre-module multi-file grammar format: the grammar format of GF before version 2.0
is still not yet supported.
<LI>Context-free and EBNF input grammar formats.
<LI>Probabilistic GF grammars.
<LI>Some output formats: LBNF.
<LI>Some GF shell commands: while the main ones will be supported with their familiar
syntax and options, some old commands have not been included. The GF shell
command <CODE>help -changes</CODE> gives the actual list.
</UL>
<P>
Users who want to have these features are welcome to contact us,
and even more welcome to contribute code that restores them!
</P>
<H2>GF language extensions</H2>
<P>
Operations for defining patterns.
</P>
<P>
Inheritance of overload groups.
</P>
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags -thtml doc/gf3-release.txt -->
</BODY></HTML>

View File

@@ -1,58 +0,0 @@
GF 3.0
Krasimir Angelov, Björn Bringert, and Aarne Ranta
Beta release, 27 June 2008
GF Version 3.0 is a major revision of GF. The source language is a superset of the
language in 2.9, which means backward compatibility. But the target languages, the
compiler implementation, and the functionalities (e.g. the shell) have undergone
radical changes.
==New features==
Here is a summary of the main novelties visible to the user:
- **Size**: the source code and the executable binary size have gone
down to about the half of 2.9.
- **Portability**: the new back end format PGF (Portable Grammar Format) is
much simpler than the old GFC format, and therefore easier to port to new
platforms.
- **Multilingual web page support**: as an example of portability, GF 3.0 provides a
compiler from PGF to JavaScript. There are also JavaScript libraries for creating
translators and syntax editors as client-side web applications.
- **Incremental parsing**: there is a possibility of word completion when
input strings are sent to the parser.
- **Application programmer's interfaces**: both source-GF and PGF formats,
the shell, and the compiler are accessible via high-level APIs.
- **Resource library version 1.4**: more coverage, more languages; some of
the new GF language features are exploited.
- **Uniform character encoding**: UTF8 in generated files, user-definable in
source files
==Non-supported features==
There are some features of GF 2.9 that will //not// work in the 3.0 beta release.
- Java Editor GUI: we now see the JavaScript editor as the main form of
syntax editing.
- Pre-module multi-file grammar format: the grammar format of GF before version 2.0
is still not yet supported.
- Context-free and EBNF input grammar formats.
- Probabilistic GF grammars.
- Some output formats: LBNF.
- Some GF shell commands: while the main ones will be supported with their familiar
syntax and options, some old commands have not been included. The GF shell
command ``help -changes`` gives the actual list.
Users who want to have these features are welcome to contact us,
and even more welcome to contribute code that restores them!
==GF language extensions==
Operations for defining patterns.
Inheritance of overload groups.

View File

@@ -1,106 +0,0 @@
graph{
size = "8,8" ;
overlap = scale ;
"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ;
"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ;
"1" -- "Abs" [style = "solid"];
"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ;
"2" -- "Abs" [style = "solid"];
"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ;
"3" -- "Abs" [style = "solid"];
"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ;
"4" -- "Abs" [style = "solid"];
"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ;
"5" -- "Abs" [style = "solid"];
"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ;
"6" -- "Abs" [style = "solid"];
"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ;
"7" -- "Abs" [style = "solid"];
"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ;
"8" -- "Abs" [style = "solid"];
"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ;
"9" -- "Abs" [style = "solid"];
"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ;
"10" -- "Abs" [style = "solid"];
"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ;
"11" -- "Abs" [style = "solid"];
"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "12" [style = "solid"];
"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "13" [style = "solid"];
"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "14" [style = "solid"];
"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "15" [style = "solid"];
"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "16" [style = "solid"];
"17" [label = "Polish", style = "solid", shape = "ellipse", color = "orange"] ;
"Abs" -- "17" [style = "solid"];
"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "18" [style = "solid"];
"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "19" [style = "solid"];
"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ;
"Abs" -- "20" [style = "solid"];
"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "orange"] ;
"Abs" -- "21" [style = "solid"];
"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ;
"Abs" -- "22" [style = "solid"];
"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ;
"Abs" -- "23" [style = "solid"];
"24" [label = "Catalan", style = "dotted", shape = "ellipse", color = "green"] ;
"Abs" -- "24" [style = "solid"];
"25" [label = "Norwegian", style = "dotted", shape = "ellipse", color = "green"] ;
"Abs" -- "25" [style = "solid"];
"26" [label = "Russian", style = "dotted", shape = "ellipse", color = "green"] ;
"Abs" -- "26" [style = "solid"];
"27" [label = "Interlingua", style = "dotted", shape = "ellipse", color = "green"] ;
"Abs" -- "27" [style = "solid"];
"28" [label = "Latin", style = "dotted", shape = "ellipse", color = "orange"] ;
"Abs" -- "28" [style = "solid"];
"29" [label = "Turkish", style = "dotted", shape = "ellipse", color = "orange"] ;
"Abs" -- "29" [style = "solid"];
"30" [label = "Hindi", style = "dotted", shape = "ellipse", color = "orange"] ;
"Abs" -- "30" [style = "solid"];
"31" [label = "Thai", style = "dotted", shape = "ellipse", color = "orange"] ;
"Abs" -- "31" [style = "solid"];
"32" [label = "Urdu", style = "dotted", shape = "ellipse", color = "orange"] ;
"Abs" -- "32" [style = "solid"];
"33" [label = "Telugu", style = "dotted", shape = "ellipse", color = "red"] ;
"Abs" -- "33" [style = "solid"];
"34" [label = "Arabic", style = "dotted", shape = "ellipse", color = "orange"] ;
"Abs" -- "34" [style = "solid"];
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 439 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 MiB

View File

@@ -1,46 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<TITLE>Library-Based Grammar Engineering</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>Library-Based Grammar Engineering</H1>
<FONT SIZE="4">
<I>VR Project 2006-2008</I><BR>
</FONT></CENTER>
<H1>Staff</H1>
<P>
Lars Borin (co-leader)
</P>
<P>
Robin Cooper (co-leader)
</P>
<P>
Aarne Ranta (project responsible)
</P>
<P>
Sibylle Schupp (co-leader)
</P>
<H1>Publications</H1>
<P>
Ali El Dada, MSc Thesis
</P>
<P>
Muhammad Humayoun, MSc Thesis
</P>
<P>
Janna Khegai,
Language Engineering in GF, PhD Thesis, Chalmers. 2006.
</P>
<H1>Links</H1>
<P>
<A HREF="http://www.cs.chalmers.se/~aarne/GF/">GF</A>
</P>
<P>
<A HREF="http://www.cs.chalmers.se/~markus/FM/">Functional Morphology</A>
</P>
<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags -thtml vr.txt -->
</BODY></HTML>

View File

@@ -1,32 +0,0 @@
Library-Based Grammar Engineering
VR Project 2006-2008
=Staff=
Lars Borin (co-leader)
Robin Cooper (co-leader)
Aarne Ranta (project responsible)
Sibylle Schupp (co-leader)
=Publications=
Ali El Dada, MSc Thesis
Muhammad Humayoun, MSc Thesis
Janna Khegai,
Language Engineering in GF, PhD Thesis, Chalmers. 2006.
=Links=
[GF http://www.cs.chalmers.se/~aarne/GF/]
[Functional Morphology http://www.cs.chalmers.se/~markus/FM/]

View File

@@ -1,136 +0,0 @@
module Main where
import PGF.Editor
import PGF
import Data.Char
import System (getArgs)
-- a rough editor shell using the PGF.Edito API
-- compile:
-- cd .. ; ghc --make exper/EditShell.hs
-- use:
-- EditShell file.pgf
main = do
putStrLn "Hi, I'm the Editor! Type h for help on commands."
file:_ <- getArgs
pgf <- readPGF file
let dict = pgf2dict pgf
let st0 = new (startCat pgf)
let lang = head (languages pgf) ---- for printnames; enable choosing lang
editLoop pgf dict lang st0 -- alt 1: all editing commands
-- dialogueLoop pgf dict lang st0 -- alt 2: just refinement by parsing (see bottom)
editLoop :: PGF -> Dict -> Language -> State -> IO State
editLoop pgf dict lang st = do
putStrLn $
if null (allMetas st)
then unlines
(["The tree is complete:",prState st] ++ linearizeAll pgf (stateTree st))
else if isMetaFocus st
then "I want something of type " ++ showType (focusType st) ++
" (0 - " ++ show (length (refineMenu dict st)-1) ++ ")"
else "Do you want to change this node?"
c <- getLine
st' <- interpret pgf dict st c
editLoop pgf dict lang st'
interpret :: PGF -> Dict -> State -> String -> IO State
interpret pgf dict st c = case words c of
"r":f:_ -> do
let st' = goNextMeta (refine dict (mkCId f) st)
prLState pgf st'
return st'
"p":ws -> do
let tts = parseAll pgf (focusType st) (dropWhile (not . isSpace) c)
st' <- selectReplace dict (concat tts) st
prLState pgf st'
return st'
"a":_ -> do
t:_ <- generateRandom pgf (focusType st)
let st' = goNextMeta (replace dict t st)
prLState pgf st'
return st'
"d":_ -> do
let st' = delete st
prLState pgf st'
return st'
"m":_ -> do
putStrLn (unwords (map prCId (refineMenu dict st)))
return st
d : _ | all isDigit d -> do
let f = refineMenu dict st !! read d
let st' = goNextMeta (refine dict f st)
prLState pgf st'
return st'
p@('[':_):_ -> do
let st' = goPosition (mkPosition (read p)) st
prLState pgf st'
return st'
">":_ -> do
let st' = goNext st
prLState pgf st'
return st'
"x":_ -> do
mapM_ putStrLn [show (showPosition p) ++ showType t | (p,t) <- allMetas st]
return st
"h":_ -> putStrLn commandHelp >> return st
_ -> do
putStrLn "command not understood"
return st
prLState pgf st = do
let t = stateTree st
putStrLn (unlines ([
"Now I have:","",
prState st] ++
linearizeAll pgf t))
-- prompt selection from list of trees, such as ambiguous choice
selectReplace :: Dict -> [Tree] -> State -> IO State
selectReplace dict ts st = case ts of
[] -> putStrLn "no results" >> return st
[t] -> return $ goNextMeta $ replace dict t st
_ -> do
mapM_ putStrLn $ "choose tree by entering its number:" :
[show i ++ " : " ++ showTree t | (i,t) <- zip [0..] ts]
d <- getLine
let t = ts !! read d
return $ goNextMeta $ replace dict t st
commandHelp = unlines [
"a -- refine with a random subtree",
"d -- delete current subtree",
"h -- display this help message",
"m -- show refinement menu",
"p Anything -- parse Anything and refine with it",
"r Function -- refine with Function",
"x -- show all unknown positions and their types",
"4 -- refine with 4th item from menu (see m)",
"[1,2,3] -- go to position 1,2,3",
"> -- go to next node"
]
----------------
-- for a dialogue system, working just by parsing; questions are cat printnames
----------------
dialogueLoop :: PGF -> Dict -> Language -> State -> IO State
dialogueLoop pgf dict lang st = do
putStrLn $
if null (allMetas st)
then "Ready!\n " ++ unlines (linearizeAll pgf (stateTree st))
else if isMetaFocus st
then showPrintName pgf lang (focusType st)
else "Do you want to change this node?"
c <- getLine
st' <- interpretD pgf dict st c
dialogueLoop pgf dict lang st'
interpretD :: PGF -> Dict -> State -> String -> IO State
interpretD pgf dict st c = do
let tts = parseAll pgf (focusType st) c
st' <- selectReplace dict (concat tts) st
-- prLState pgf st'
return st'

View File

@@ -1,461 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : Evaluate
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 15:39:12 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.19 $
--
-- Computation of source terms. Used in compilation and in @cc@ command.
-----------------------------------------------------------------------------
module GF.Compile.Evaluate (appEvalConcrete) where
import GF.Data.Operations
import GF.Grammar.Grammar
import GF.Infra.Ident
import GF.Data.Str
import GF.Grammar.PrGrammar
import GF.Infra.Modules
import GF.Infra.Option
import GF.Grammar.Macros
import GF.Grammar.Lookup
import GF.Grammar.Refresh
import GF.Grammar.PatternMatch
import GF.Grammar.Lockfield (isLockLabel) ----
import GF.Grammar.AppPredefined
import qualified Data.Map as Map
import Data.List (nub,intersperse)
import Control.Monad (liftM2, liftM)
import Debug.Trace
data EEnv = EEnv {
computd :: Map.Map (Ident,Ident) FTerm,
temp :: Int
}
emptyEEnv = EEnv Map.empty 0
lookupComputed :: (Ident,Ident) -> STM EEnv (Maybe FTerm)
lookupComputed mc = do
env <- readSTM
return $ Map.lookup mc $ computd env
updateComputed :: (Ident,Ident) -> FTerm -> STM EEnv ()
updateComputed mc t = updateSTM (\e -> e{computd = Map.insert mc t (computd e)})
getTemp :: STM EEnv Ident
getTemp = do
env <- readSTM
updateSTM (\e -> e{temp = temp e + 1})
return $ identC ("#" ++ show (temp env))
data FTerm =
FTC Term
| FTF (Term -> FTerm)
prFTerm :: Integer -> FTerm -> String
prFTerm i t = case t of
FTC t -> prt t
FTF f -> show i +++ "->" +++ prFTerm (i + 1) (f (EInt i))
term2fterm t = case t of
Abs x b -> FTF (\t -> term2fterm (subst [(x,t)] b))
_ -> FTC t
traceFTerm c ft = ft ----trace ("\n" ++ prt c +++ "=" +++ take 60 (prFTerm 0 ft)) ft
fterm2term :: FTerm -> STM EEnv Term
fterm2term t = case t of
FTC t -> return t
FTF f -> do
x <- getTemp
b <- fterm2term $ f (Vr x)
return $ Abs x b
subst g t = case t of
Vr x -> maybe t id $ lookup x g
_ -> composSafeOp (subst g) t
appFTerm :: FTerm -> [Term] -> FTerm
appFTerm ft ts = case (ft,ts) of
(FTF f, x:xs) -> appFTerm (f x) xs
_ -> ft
{-
(FTC _, []) -> ft
(FTC f, [a]) -> case appPredefined (App f a) of
Ok (t,_) -> FTC t
_ -> error $ "error: appFTerm" +++ prFTerm 0 ft +++ unwords (map prt ts)
_ -> error $ "error: appFTerm" +++ prFTerm 0 ft +++ unwords (map prt ts)
-}
apps :: Term -> (Term,[Term])
apps t = case t of
App f a -> (f',xs ++ [a]) where (f',xs) = apps f
_ -> (t,[])
appEvalConcrete gr bt = liftM fst $ appSTM (evalConcrete gr bt) emptyEEnv
evalConcrete :: SourceGrammar -> BinTree Ident Info -> STM EEnv (BinTree Ident Info)
evalConcrete gr mo = mapMTree evaldef mo where
evaldef (f,info) = case info of
CncFun (mt@(Just (_,ty@(cont,val)))) pde ppr ->
evalIn ("\nerror in linearization of function" +++ prt f +++ ":") $
do
pde' <- case pde of
Yes de -> do
liftM yes $ pEval ty de
_ -> return pde
--- ppr' <- liftM yes $ evalPrintname gr c ppr pde'
return $ (f, CncFun mt pde' ppr) -- only cat in type actually needed
_ -> return (f,info)
pEval (context,val) trm = do ---- errIn ("parteval" +++ prt_ trm) $ do
let
vars = map fst context
args = map Vr vars
subst = [(v, Vr v) | v <- vars]
trm1 = mkApp trm args
trm3 <- recordExpand val trm1 >>= comp subst
return $ mkAbs vars trm3
recordExpand typ trm = case unComputed typ of
RecType tys -> case trm of
FV rs -> return $ FV [R [assign lab (P r lab) | (lab,_) <- tys] | r <- rs]
_ -> return $ R [assign lab (P trm lab) | (lab,_) <- tys]
_ -> return trm
comp g t = case t of
Q (IC "Predef") _ -> trace ("\nPredef:\n" ++ prt t) $ return t
Q p c -> do
md <- lookupComputed (p,c)
case md of
Nothing -> do
d <- lookRes (p,c)
updateComputed (p,c) $ traceFTerm c $ term2fterm d
return d
Just d -> fterm2term d >>= comp g
App f a -> case apps t of
(h@(Q p c),xs) | p == IC "Predef" -> do
xs' <- mapM (comp g) xs
(t',b) <- stmErr $ appPredefined (foldl App h xs')
if b then return t' else comp g t'
(h@(Q p c),xs) -> do
xs' <- mapM (comp g) xs
md <- lookupComputed (p,c)
case md of
Just ft -> do
t <- fterm2term $ appFTerm ft xs'
comp g t
Nothing -> do
d <- lookRes (p,c)
let ft = traceFTerm c $ term2fterm d
updateComputed (p,c) ft
t' <- fterm2term $ appFTerm ft xs'
comp g t'
_ -> do
f' <- comp g f
a' <- comp g a
case (f',a') of
(Abs x b,_) -> comp (ext x a' g) b
(QC _ _,_) -> returnC $ App f' a'
(FV fs, _) -> mapM (\c -> comp g (App c a')) fs >>= return . variants
(_, FV as) -> mapM (\c -> comp g (App f' c)) as >>= return . variants
(Alias _ _ d, _) -> comp g (App d a')
(S (T i cs) e,_) -> prawitz g i (flip App a') cs e
_ -> do
(t',b) <- stmErr $ appPredefined (App f' a')
if b then return t' else comp g t'
Vr x -> do
t' <- maybe (prtRaise (
"context" +++ show g +++ ": no value given to variable") x) return $ lookup x g
case t' of
_ | t == t' -> return t
_ -> comp g t'
Abs x b -> do
b' <- comp (ext x (Vr x) g) b
return $ Abs x b'
Let (x,(_,a)) b -> do
a' <- comp g a
comp (ext x a' g) b
Prod x a b -> do
a' <- comp g a
b' <- comp (ext x (Vr x) g) b
return $ Prod x a' b'
P t l | isLockLabel l -> return $ R []
---- a workaround 18/2/2005: take this away and find the reason
---- why earlier compilation destroys the lock field
P t l -> do
t' <- comp g t
case t' of
FV rs -> mapM (\c -> comp g (P c l)) rs >>= returnC . variants
R r -> maybe
(prtRaise (prt t' ++ ": no value for label") l) (comp g . snd) $
lookup l r
ExtR a (R b) -> case lookup l b of ----comp g (P (R b) l) of
Just (_,v) -> comp g v
_ -> comp g (P a l)
S (T i cs) e -> prawitz g i (flip P l) cs e
_ -> returnC $ P t' l
S t@(T _ cc) v -> do
v' <- comp g v
case v' of
FV vs -> do
ts' <- mapM (comp g . S t) vs
return $ variants ts'
_ -> case matchPattern cc v' of
Ok (c,g') -> comp (g' ++ g) c
_ | isCan v' -> prtRaise ("missing case" +++ prt v' +++ "in") t
_ -> do
t' <- comp g t
return $ S t' v' -- if v' is not canonical
S t v -> do
t' <- comp g t
v' <- comp g v
case t' of
T _ [(PV IW,c)] -> comp g c --- an optimization
T _ [(PT _ (PV IW),c)] -> comp g c
T _ [(PV z,c)] -> comp (ext z v' g) c --- another optimization
T _ [(PT _ (PV z),c)] -> comp (ext z v' g) c
FV ccs -> mapM (\c -> comp g (S c v')) ccs >>= returnC . variants
V ptyp ts -> do
vs <- stmErr $ allParamValues gr ptyp
ps <- stmErr $ mapM term2patt vs
let cc = zip ps ts
case v' of
FV vs -> mapM (\c -> comp g (S t' c)) vs >>= returnC . variants
_ -> case matchPattern cc v' of
Ok (c,g') -> comp (g' ++ g) c
_ | isCan v' -> prtRaise ("missing case" +++ prt v' +++ "in") t
_ -> return $ S t' v' -- if v' is not canonical
T _ cc -> case v' of
FV vs -> mapM (\c -> comp g (S t' c)) vs >>= returnC . variants
_ -> case matchPattern cc v' of
Ok (c,g') -> comp (g' ++ g) c
_ | isCan v' -> prtRaise ("missing case" +++ prt v' +++ "in") t
_ -> return $ S t' v' -- if v' is not canonical
Alias _ _ d -> comp g (S d v')
S (T i cs) e -> prawitz g i (flip S v') cs e
_ -> returnC $ S t' v'
-- normalize away empty tokens
K "" -> return Empty
-- glue if you can
Glue x0 y0 -> do
x <- comp g x0
y <- comp g y0
case (x,y) of
(Alias _ _ d, y) -> comp g $ Glue d y
(x, Alias _ _ d) -> comp g $ Glue x d
(S (T i cs) e, s) -> prawitz g i (flip Glue s) cs e
(s, S (T i cs) e) -> prawitz g i (Glue s) cs e
(_,Empty) -> return x
(Empty,_) -> return y
(K a, K b) -> return $ K (a ++ b)
(_, Alts (d,vs)) -> do
---- (K a, Alts (d,vs)) -> do
let glx = Glue x
comp g $ Alts (glx d, [(glx v,c) | (v,c) <- vs])
(Alts _, ka) -> checks [do
y' <- stmErr $ strsFromTerm ka
---- (Alts _, K a) -> checks [do
x' <- stmErr $ strsFromTerm x -- this may fail when compiling opers
return $ variants [
foldr1 C (map K (str2strings (glueStr v u))) | v <- x', u <- y']
---- foldr1 C (map K (str2strings (glueStr v (str a)))) | v <- x']
,return $ Glue x y
]
(FV ks,_) -> do
kys <- mapM (comp g . flip Glue y) ks
return $ variants kys
(_,FV ks) -> do
xks <- mapM (comp g . Glue x) ks
return $ variants xks
_ -> do
mapM_ checkNoArgVars [x,y]
r <- composOp (comp g) t
returnC r
Alts _ -> do
r <- composOp (comp g) t
returnC r
-- remove empty
C a b -> do
a' <- comp g a
b' <- comp g b
case (a',b') of
(Alts _, K a) -> checks [do
as <- stmErr $ strsFromTerm a' -- this may fail when compiling opers
return $ variants [
foldr1 C (map K (str2strings (plusStr v (str a)))) | v <- as]
,
return $ C a' b'
]
(Empty,_) -> returnC b'
(_,Empty) -> returnC a'
_ -> returnC $ C a' b'
-- reduce free variation as much as you can
FV ts -> mapM (comp g) ts >>= returnC . variants
-- merge record extensions if you can
ExtR r s -> do
r' <- comp g r
s' <- comp g s
case (r',s') of
(Alias _ _ d, _) -> comp g $ ExtR d s'
(_, Alias _ _ d) -> comp g $ Glue r' d
(R rs, R ss) -> stmErr $ plusRecord r' s'
(RecType rs, RecType ss) -> stmErr $ plusRecType r' s'
_ -> return $ ExtR r' s'
-- case-expand tables
-- if already expanded, don't expand again
T i@(TComp _) cs -> do
-- if there are no variables, don't even go inside
cs' <- if (null g) then return cs else mapPairsM (comp g) cs
return $ T i cs'
--- this means some extra work; should implement TSh directly
TSh i cs -> comp g $ T i [(p,v) | (ps,v) <- cs, p <- ps]
T i cs -> do
pty0 <- stmErr $ getTableType i
ptyp <- comp g pty0
case allParamValues gr ptyp of
Ok vs -> do
cs' <- mapM (compBranchOpt g) cs
sts <- stmErr $ mapM (matchPattern cs') vs
ts <- mapM (\ (c,g') -> comp (g' ++ g) c) sts
ps <- stmErr $ mapM term2patt vs
let ps' = ps --- PT ptyp (head ps) : tail ps
return $ --- V ptyp ts -- to save space, just course of values
T (TComp ptyp) (zip ps' ts)
_ -> do
cs' <- mapM (compBranch g) cs
return $ T i cs' -- happens with variable types
-- otherwise go ahead
_ -> composOp (comp g) t >>= returnC
lookRes (p,c) = case lookupResDefKind gr p c of
Ok (t,_) | noExpand p -> return t
Ok (t,0) -> comp [] t
Ok (t,_) -> return t
Bad s -> raise s
noExpand p = errVal False $ do
mo <- lookupModule gr p
return $ case getOptVal (iOpts (flags mo)) useOptimizer of
Just "noexpand" -> True
_ -> False
prtRaise s t = raise (s +++ prt t)
ext x a g = (x,a):g
returnC = return --- . computed
variants ts = case nub ts of
[t] -> t
ts -> FV ts
isCan v = case v of
Con _ -> True
QC _ _ -> True
App f a -> isCan f && isCan a
R rs -> all (isCan . snd . snd) rs
_ -> False
compBranch g (p,v) = do
let g' = contP p ++ g
v' <- comp g' v
return (p,v')
compBranchOpt g c@(p,v) = case contP p of
[] -> return c
_ -> compBranch g c
---- _ -> err (const (return c)) return $ compBranch g c
contP p = case p of
PV x -> [(x,Vr x)]
PC _ ps -> concatMap contP ps
PP _ _ ps -> concatMap contP ps
PT _ p -> contP p
PR rs -> concatMap (contP . snd) rs
PAs x p -> (x,Vr x) : contP p
PSeq p q -> concatMap contP [p,q]
PAlt p q -> concatMap contP [p,q]
PRep p -> contP p
PNeg p -> contP p
_ -> []
prawitz g i f cs e = do
cs' <- mapM (compBranch g) [(p, f v) | (p,v) <- cs]
return $ S (T i cs') e
-- | argument variables cannot be glued
checkNoArgVars :: Term -> STM EEnv Term
checkNoArgVars t = case t of
Vr (IA _) -> raise $ glueErrorMsg $ prt t
Vr (IAV _) -> raise $ glueErrorMsg $ prt t
_ -> composOp checkNoArgVars t
glueErrorMsg s =
"Cannot glue (+) term with run-time variable" +++ s ++ "." ++++
"Use Prelude.bind instead."
stmErr :: Err a -> STM s a
stmErr e = stm (\s -> do
v <- e
return (v,s)
)
evalIn :: String -> STM s a -> STM s a
evalIn msg st = stm $ \s -> case appSTM st s of
Bad e -> Bad $ msg ++++ e
Ok vs -> Ok vs

View File

@@ -1,273 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : Optimize
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/16 13:56:13 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.18 $
--
-- Top-level partial evaluation for GF source modules.
-----------------------------------------------------------------------------
module GF.Compile.Optimize (optimizeModule) where
import GF.Grammar.Grammar
import GF.Infra.Ident
import GF.Infra.Modules
import GF.Grammar.PrGrammar
import GF.Grammar.Macros
import GF.Grammar.Lookup
import GF.Grammar.Refresh
import GF.Grammar.Compute
import GF.Compile.BackOpt
import GF.Compile.CheckGrammar
import GF.Compile.Update
import GF.Compile.Evaluate
import GF.Data.Operations
import GF.Infra.CheckM
import GF.Infra.Option
import Control.Monad
import Data.List
-- | partial evaluation of concrete syntax. AR 6\/2001 -- 16\/5\/2003 -- 5\/2\/2005.
-- only do this for resource: concrete is optimized in gfc form
optimizeModule :: Options -> [(Ident,SourceModule)] -> (Ident,SourceModule) ->
Err (Ident,SourceModule)
optimizeModule opts ms mo@(_,mi) = case mi of
m0@(Module mt st fs me ops js) | st == MSComplete && isModRes m0 -> do
mo1 <- evalModule oopts ms mo
return $ case optim of
"parametrize" -> shareModule paramOpt mo1 -- parametrization and sharing
"values" -> shareModule valOpt mo1 -- tables as courses-of-values
"share" -> shareModule shareOpt mo1 -- sharing of branches
"all" -> shareModule allOpt mo1 -- first parametrize then values
"none" -> mo1 -- no optimization
_ -> mo1 -- none; default for src
_ -> evalModule oopts ms mo
where
oopts = addOptions opts (iOpts (flagsModule mo))
optim = maybe "all" id $ getOptVal oopts useOptimizer
evalModule :: Options -> [(Ident,SourceModule)] -> (Ident,SourceModule) -> Err (Ident,SourceModule)
evalModule oopts ms mo@(name,mod) = case mod of
m0@(Module mt st fs me ops js) | st == MSComplete -> case mt of
{-
-- now: don't optimize resource
_ | isModRes m0 -> do
let deps = allOperDependencies name js
ids <- topoSortOpers deps
MGrammar (mod' : _) <- foldM evalOp gr ids
return $ mod'
-}
MTConcrete a -> do
-----
js0 <- appEvalConcrete gr js
js' <- mapMTree (evalCncInfo oopts gr name a) js0 ---- <- gr0 6/12/2005
return $ (name, Module mt st fs me ops js')
_ -> return $ (name,mod)
_ -> return $ (name,mod)
where
gr0 = MGrammar $ ms
gr = MGrammar $ (name,mod) : ms
evalOp g@(MGrammar ((_, m) : _)) i = do
info <- lookupTree prt i $ jments m
info' <- evalResInfo oopts gr (i,info)
return $ updateRes g name i info'
-- | only operations need be compiled in a resource, and this is local to each
-- definition since the module is traversed in topological order
evalResInfo :: Options -> SourceGrammar -> (Ident,Info) -> Err Info
evalResInfo oopts gr (c,info) = case info of
ResOper pty pde -> eIn "operation" $ do
pde' <- case pde of
Yes de | optres -> liftM yes $ comp de
_ -> return pde
return $ ResOper pty pde'
_ -> return info
where
comp = if optres then computeConcrete gr else computeConcreteRec gr
eIn cat = errIn ("Error optimizing" +++ cat +++ prt c +++ ":")
optim = maybe "all" id $ getOptVal oopts useOptimizer
optres = case optim of
"noexpand" -> False
_ -> True
evalCncInfo ::
Options -> SourceGrammar -> Ident -> Ident -> (Ident,Info) -> Err (Ident,Info)
evalCncInfo opts gr cnc abs (c,info) = errIn ("optimizing" +++ prt c) $ case info of
CncCat ptyp pde ppr -> do
pde' <- case (ptyp,pde) of
(Yes typ, Yes de) ->
liftM yes $ pEval ([(varStr, typeStr)], typ) de
(Yes typ, Nope) ->
liftM yes $ mkLinDefault gr typ >>= partEval noOptions gr ([(varStr, typeStr)],typ)
(May b, Nope) ->
return $ May b
_ -> return pde -- indirection
ppr' <- liftM yes $ evalPrintname gr c ppr (yes $ K $ prt c)
return (c, CncCat ptyp pde' ppr')
CncFun (mt@(Just (_,ty@(cont,val)))) pde ppr ->
eIn ("linearization in type" +++ prt (mkProd (cont,val,[])) ++++ "of function") $ do
pde' <- case pde of
----- Yes de -> do
----- liftM yes $ pEval ty de
_ -> return pde
ppr' <- liftM yes $ evalPrintname gr c ppr pde'
return $ (c, CncFun mt pde' ppr') -- only cat in type actually needed
_ -> return (c,info)
where
pEval = partEval opts gr
eIn cat = errIn ("Error optimizing" +++ cat +++ prt c +++ ":")
-- | the main function for compiling linearizations
partEval :: Options -> SourceGrammar -> (Context,Type) -> Term -> Err Term
partEval opts gr (context, val) trm = errIn ("parteval" +++ prt_ trm) $ do
let vars = map fst context
args = map Vr vars
subst = [(v, Vr v) | v <- vars]
trm1 = mkApp trm args
trm3 <- if globalTable
then etaExpand trm1 >>= comp subst >>= outCase subst
else etaExpand trm1 >>= comp subst
return $ mkAbs vars trm3
where
globalTable = oElem showAll opts --- i -all
comp g t = {- refreshTerm t >>= -} computeTerm gr g t
etaExpand t = recordExpand val t --- >>= caseEx -- done by comp
outCase subst t = do
pts <- getParams context
let (args,ptyps) = unzip $ filter (flip occur t . fst) pts
if null args
then return t
else do
let argtyp = RecType $ tuple2recordType ptyps
let pvars = map (Vr . zIdent . prt) args -- gets eliminated
patt <- term2patt $ R $ tuple2record $ pvars
let t' = replace (zip args pvars) t
t1 <- comp subst $ T (TTyped argtyp) [(patt, t')]
return $ S t1 $ R $ tuple2record args
--- notice: this assumes that all lin types follow the "old JFP style"
getParams = liftM concat . mapM getParam
getParam (argv,RecType rs) = return
[(P (Vr argv) lab, ptyp) | (lab,ptyp) <- rs, not (isLinLabel lab)]
---getParam (_,ty) | ty==typeStr = return [] --- in lindef
getParam (av,ty) =
Bad ("record type expected not" +++ prt ty +++ "for" +++ prt av)
--- all lin types are rec types
replace :: [(Term,Term)] -> Term -> Term
replace reps trm = case trm of
-- this is the important case
P _ _ -> maybe trm id $ lookup trm reps
_ -> composSafeOp (replace reps) trm
occur t trm = case trm of
-- this is the important case
P _ _ -> t == trm
S x y -> occur t y || occur t x
App f x -> occur t x || occur t f
Abs _ f -> occur t f
R rs -> any (occur t) (map (snd . snd) rs)
T _ cs -> any (occur t) (map snd cs)
C x y -> occur t x || occur t y
Glue x y -> occur t x || occur t y
ExtR x y -> occur t x || occur t y
FV ts -> any (occur t) ts
V _ ts -> any (occur t) ts
Let (_,(_,x)) y -> occur t x || occur t y
_ -> False
-- here we must be careful not to reduce
-- variants {{s = "Auto" ; g = N} ; {s = "Wagen" ; g = M}}
-- {s = variants {"Auto" ; "Wagen"} ; g = variants {N ; M}} ;
recordExpand :: Type -> Term -> Err Term
recordExpand typ trm = case unComputed typ of
RecType tys -> case trm of
FV rs -> return $ FV [R [assign lab (P r lab) | (lab,_) <- tys] | r <- rs]
_ -> return $ R [assign lab (P trm lab) | (lab,_) <- tys]
_ -> return trm
-- | auxiliaries for compiling the resource
mkLinDefault :: SourceGrammar -> Type -> Err Term
mkLinDefault gr typ = do
case unComputed typ of
RecType lts -> mapPairsM mkDefField lts >>= (return . Abs varStr . R . mkAssign)
_ -> prtBad "linearization type must be a record type, not" typ
where
mkDefField typ = case unComputed typ of
Table p t -> do
t' <- mkDefField t
let T _ cs = mkWildCases t'
return $ T (TWild p) cs
Sort "Str" -> return $ Vr varStr
QC q p -> lookupFirstTag gr q p
RecType r -> do
let (ls,ts) = unzip r
ts' <- mapM mkDefField ts
return $ R $ [assign l t | (l,t) <- zip ls ts']
_ | isTypeInts typ -> return $ EInt 0 -- exists in all as first val
_ -> prtBad "linearization type field cannot be" typ
-- | Form the printname: if given, compute. If not, use the computed
-- lin for functions, cat name for cats (dispatch made in evalCncDef above).
--- We cannot use linearization at this stage, since we do not know the
--- defaults we would need for question marks - and we're not yet in canon.
evalPrintname :: SourceGrammar -> Ident -> MPr -> Perh Term -> Err Term
evalPrintname gr c ppr lin =
case ppr of
Yes pr -> comp pr
_ -> case lin of
Yes t -> return $ K $ clean $ prt $ oneBranch t ---- stringFromTerm
_ -> return $ K $ prt c ----
where
comp = computeConcrete gr
oneBranch t = case t of
Abs _ b -> oneBranch b
R (r:_) -> oneBranch $ snd $ snd r
T _ (c:_) -> oneBranch $ snd c
V _ (c:_) -> oneBranch c
FV (t:_) -> oneBranch t
C x y -> C (oneBranch x) (oneBranch y)
S x _ -> oneBranch x
P x _ -> oneBranch x
Alts (d,_) -> oneBranch d
_ -> t
--- very unclean cleaner
clean s = case s of
'+':'+':' ':cs -> clean cs
'"':cs -> clean cs
c:cs -> c: clean cs
_ -> s

View File

@@ -1,119 +0,0 @@
%define name GF
%define version 3.0
%define release 1
Name: %{name}
Summary: Grammatical Framework
Version: %{version}
Release: %{release}
License: GPL
Group: Sciences/Other
Vendor: The Language Technology Group
URL: http://www.cs.chalmers.se/~aarne/GF/
Source: GF-%{version}.tgz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
BuildRequires: ghc
%description
The Grammatical Framework (=GF) is a grammar formalism based on type theory.
It consists of
* a special-purpose programming language
* a compiler of the language
* a generic grammar processor
The compiler reads GF grammars from user-provided files, and the
generic grammar processor performs various tasks with the grammars:
* generation
* parsing
* translation
* type checking
* computation
* paraphrasing
* random generation
* syntax editing
GF particularly addresses the following aspects of grammars:
* multilinguality (parallel grammars for different languages)
* semantics (semantic conditions of well-formedness, semantic
properties of expressions)
* grammar engineering (modularity, information hiding, reusable
libraries)
%package editor
Summary: Java syntax editor for Grammatical Framework (GF).
Group: Sciences/Other
Requires: %{name}
%description editor
This package contains the syntax editor GUI for GF.
%package editor2
Summary: Java syntax editor for Grammatical Framework (GF).
Group: Sciences/Other
Requires: %{name}
%description editor2
This package contains the syntax editor GUI for GF with printname enhancements and HTML support.
%prep
rm -rf $RPM_BUILD_ROOT
%setup -q
%build
cd src
%configure
make all
%install
cd src
%makeinstall
%clean
rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root,0755)
%{_bindir}/gf
%{_bindir}/gfdoc
%doc LICENSE README doc/{DocGF.pdf,gf2-highlights.html,index.html}
%files editor
%defattr(-,root,root,0755)
%{_bindir}/jgf
%{_datadir}/%{name}-%{version}/gf-java.jar
%files editor2
%defattr(-,root,root,0755)
%{_bindir}/gfeditor
%{_datadir}/%{name}-%{version}/gfeditor.jar
%changelog
* Tue Jun 21 2005 Hans-Joachim Daniels <daniels@ira.uka.de> 2.3pre
- added the printnames and HTML enhanced editor as editor2
* Thu May 12 2005 Bjorn Bringert <bringert@cs.chalmers.se> 2.2pre2-1
- Split package into gf and gf-editor packages.
* Wed May 11 2005 Bjorn Bringert <bringert@cs.chalmers.se> 2.2pre1-1
- Release of GF 2.2
* Mon Nov 8 2004 Aarne Ranta <aarne@cs.chalmers.se> 2.1-1
- Release of GF 2.1
* Thu Jun 24 2004 Bjorn Bringert <bringert@cs.chalmers.se> 2.0-2
- Set ownership correctly.
- Move jar-file to share (thanks to Anders Carlsson for pointing this out.)
- Added vendor tag.
* Tue Jun 22 2004 Bjorn Bringert <bringert@cs.chalmers.se> 2.0-1
- Include gfdoc binary
* Mon Jun 21 2004 Bjorn Bringert <bringert@cs.chalmers.se> 2.0-1
- Initial packaging

View File

@@ -1,63 +0,0 @@
<?xml version="1.0"?>
<Wix xmlns="http://schemas.microsoft.com/wix/2003/01/wi">
<Product Id="4717AF5D-52AC-4D13-85E6-D87278CE9BBC"
UpgradeCode="0BB7BB08-1A79-4981-A03F-32B401B01010"
Name="Grammatical Framework, version @PACKAGE_VERSION@"
Language="1033" Version="2.2" Manufacturer="The GF Developers">
<Package Id="????????-????-????-????-????????????"
Description="Grammatical Framework, version @PACKAGE_VERSION@"
Comments="This package contains the Grammatical Framework system, version @PACKAGE_VERSION@."
InstallerVersion="200" Compressed="yes" />
<Media Id="1" Cabinet="gf.cab" EmbedCab="yes" />
<Directory Id="TARGETDIR" Name="SourceDir">
<Directory Id="ProgramFilesFolder">
<Directory Id="INSTALLDIR" Name="GF-@PACKAGE_VERSION@">
<Component Id="GFBinary" Guid="E2A44A6C-0252-4346-85AE-BC6A16BFB0FC" DiskId="1">
<File Id="GFEXE" Name="gf.exe" src="../bin/gf.exe" />
<Shortcut Id="GFStartMenu" Directory="GFProgramMenuDir"
Name="GF" Target="[!GFEXE]" />
</Component>
<Component Id="GFDocBinary" Guid="BDCA6F34-EE0A-4E72-8D00-CB7CAF3CEAEA" DiskId="1">
<File Id="GFDocEXE" Name="gfdoc.exe" src="tools/gfdoc.exe" />
</Component>
<Component Id="GFEditor" Guid="39F885F7-BC49-4CBC-9DCD-569C95AA3364" DiskId="1">
<Environment Id="GFHomeEnv" Name="GF_HOME" Action="create" Part="all"
Permanent="no" Value="[INSTALLDIR]" />
<File Id="GFEditorBat" Name="jgf.bat" src="jgf.bat" />
<File Id="GFEditorJar" Name="gf-java.jar" src="JavaGUI/gf-java.jar" />
<Shortcut Id="GFEditorStartMenu" Directory="GFProgramMenuDir"
Name="GFEditor" LongName="GF Editor" Target="[!GFEditorBat]"
WorkingDirectory="INSTALLDIR" />
</Component>
<Directory Id="GFDocDir" Name="doc">
<Component Id="GFDoc" Guid="23BEEBBF-F9AB-459F-B8D2-8414BB47834A" DiskId="1">
<File Id="GFReadme" Name="README.txt" src="../README" />
<File Id="GFLicenee" Name="LICENSE.txt" src="../LICENSE" />
</Component>
</Directory>
</Directory>
</Directory>
<Directory Id="ProgramMenuFolder" Name="PMenu" LongName="Programs">
<Directory Id="GFProgramMenuDir" Name='GF-@PACKAGE_VERSION@' />
</Directory>
</Directory>
<Feature Id="ProductFeature" Title="Feature Title" Level="1">
<ComponentRef Id="GFBinary" />
<ComponentRef Id="GFDocBinary" />
<ComponentRef Id="GFEditor" />
<ComponentRef Id="GFDoc" />
</Feature>
</Product>
</Wix>

View File

@@ -1,98 +0,0 @@
# GF ATK configuration file
# ------------------------
# -- Basic audio signal processing --
SOURCEFORMAT = HAUDIO
SOURCERATE = 625
# Set in GF/System/ATKSpeechInput.hs
# TARGETKIND = MFCC_0_D_A
TARGETRATE = 100000.0
WINDOWSIZE = 250000.0
ENORMALISE = F
ZMEANSOURCE = F
USEHAMMING = T
PREEMCOEF = 0.97
USEPOWER = T
NUMCHANS = 26
CEPLIFTER = 22
NUMCEPS = 12
SILFLOOR = 50.0
USESILDET = T
MEASURESIL = F
OUTSILWARN = T
# -- Silence detection ---
HPARM: CALWINDOW = 40
HPARM: SPEECHTHRESH = 9.0
HPARM: SILDISCARD = 10.0
HPARM: SILENERGY = 0.0
HPARM: SPCSEQCOUNT = 10
HPARM: SPCGLCHCOUNT = 0
HPARM: SILGLCHCOUNT = 2
HPARM: SILSEQCOUNT = 50
# -- Cepstral mean ---
HPARM: CMNTCONST = 0.995
HPARM: CMNRESETONSTOP = F
HPARM: CMNMINFRAMES = 12
# -- Recogniser --
AREC: TRBAKFREQ = 1
# hands free, don't return results until end
AREC: RUNMODE = 01441
AREC: GENBEAM = 200.0
AREC: WORDBEAM = 175.0
AREC: WORDPEN = -10.0
HNET: FORCECXTEXP = T
HNET: ALLOWXWRDEXP = F
HNET: MARKSUBLAT = F
ARMAN: AUTOSIL = F
HREC: CONFSCALE = 0.15
HREC: CONFOFFSET = 0.0
#HREC: CONFBGHMM = bghmm
# -- Set visibility and positions of ATK controls --
AIN: DISPSHOW = T
AIN: DISPXORIGIN = 440
AIN: DISPYORIGIN = 220
AIN: DISPHEIGHT = 40
AIN: DISPWIDTH = 160
ACODE: DISPSHOW = F
ACODE: DISPXORIGIN = 40
ACODE: DISPYORIGIN = 220
ACODE: DISPHEIGHT = 220
ACODE: DISPWIDTH = 380
ACODE: MAXFGFEATS = 13
ACODE: NUMSTREAMS = 1
AREC: DISPSHOW = T
AREC: DISPXORIGIN = 40
AREC: DISPYORIGIN = 20
AREC: DISPHEIGHT = 160
AREC: DISPWIDTH = 560
# -- Debugging --
HMMSET: TRACE = 0
ADICT: TRACE = 0
AGRAM: TRACE = 0
GGRAM: TRACE = 0
AREC: TRACE = 0
ARMAN: TRACE = 0
HPARM: TRACE = 0
HNET: TRACE = 0
HREC: TRACE = 0

View File

@@ -1,30 +0,0 @@
#!/bin/sh
prefix="@prefix@"
case "@host@" in
*-cygwin)
prefix=`cygpath -w "$prefix"`;;
esac
exec_prefix="@exec_prefix@"
GF_BIN_DIR="@bindir@"
GF_DATA_DIR="@datadir@/GF-@PACKAGE_VERSION@"
GFBIN="$GF_BIN_DIR/gf"
if [ ! -x "${GFBIN}" ]; then
GF_BIN_DIR=`dirname $0`
GFBIN="$GF_BIN_DIR/gf"
fi
if [ ! -x "${GFBIN}" ]; then
GFBIN=`which gf`
fi
if [ ! -x "${GFBIN}" ]; then
echo "gf not found."
exit 1
fi
exec $GFBIN --batch "$@"

View File

@@ -1,169 +0,0 @@
# checking that a file is haddocky:
# - checking if it has an export list
# - if there is no export list, it tries to find all defined functions
# - checking that all exported functions have type signatures
# - checking that the module header is OK
# changes on files:
# - transforming hard space to ordinary space
# limitations:
# - there might be some problems with nested comments
# - cannot handle type signatures for several functions
# (i.e. "a, b, c :: t")
# but on the other hand -- haddock has some problems with these too...
$operChar = qr/[\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]/;
$operCharColon = qr/[\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]/;
$nonOperChar = qr/[^\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]/;
$nonOperCharColon = qr/[^\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]/;
$operSym = qr/$operChar $operCharColon*/x;
$funSym = qr/[a-z] \w* \'*/x;
$funOrOper = qr/(?: $funSym | \($operSym\) )/x;
$keyword = qr/(?: type | data | module | newtype | infix[lr]? | import | instance | class )/x;
$keyOper = qr/^(?: \.\. | \:\:? | \= | \\ | \| | \<\- | \-\> | \@ | \~ | \=\> | \. )$/x;
sub check_headerline {
my ($title, $regexp) = @_;
if (s/^-- \s $title \s* : \s+ (.+?) \s*\n//sx) {
$name = $1;
push @ERR, "Incorrect ".lcfirst $title.": $name"
unless $name =~ $regexp;
return $&;
} else {
push @ERR, "Header missing: ".lcfirst $title."";
}
}
if ($#ARGV >= 0) {
@FILES = @ARGV;
} else {
# @dirs = qw{. api canonical cf cfgm compile for-ghc-nofud
# grammar infra notrace parsers shell
# source speech translate useGrammar util visualization
# GF GF/* GF/*/* GF/*/*/*};
@dirs = qw{GF GF/* GF/*/* GF/*/*/*};
@FILES = grep(!/\/(Par|Lex)(GF|GFC|CFG)\.hs$/,
glob "{".join(",",@dirs)."}/*.hs");
}
for $file (@FILES) {
$file =~ s/\.hs//;
open F, "<$file.hs";
$_ = join "", <F>;
close F;
@ERR = ();
# substituting hard spaces for ordinary spaces
$nchars = tr/\240/ /;
if ($nchars > 0) {
push @ERR, "!! > Substituted $nchars hard spaces";
open F, ">$file.hs";
print F $_;
close F;
}
# the module header
$hdr_module = $module = "";
s/^ \{-\# \s+ OPTIONS \s+ -cpp \s+ \#-\} //sx; # removing ghc options (cpp)
s/^ \s+ //sx; # removing initial whitespace
s/^ (--+ \s* \n) +//sx; # removing initial comment lines
unless (s/^ -- \s \| \s* \n//sx) {
push @ERR, "Incorrect module header";
} else {
$hdr_module = s/^-- \s Module \s* : \s+ (.+?) \s*\n//sx ? $1 : "";
&check_headerline("Maintainer", qr/^ [\wåäöÅÄÖüÜ\s\@\.]+ $/x);
&check_headerline("Stability", qr/.*/);
&check_headerline("Portability", qr/.*/);
s/^ (--+ \s* \n) +//sx;
push @ERR, "Missing CVS information"
unless s/^(-- \s+ \> \s+ CVS \s+ \$ .*? \$ \s* \n)+//sx;
s/^ (--+ \s* \n) +//sx;
push @ERR, "Missing module description"
unless /^ -- \s+ [^\(]/x;
}
# removing comments
s/\{- .*? -\}//gsx;
s/-- ($nonOperSymColon .*? \n | \n)/\n/gx;
# removing \n in front of whitespace (for simplification)
s/\n+[ \t]/ /gs;
# the export list
$exportlist = "";
if (/\n module \s+ ((?: \w | \.)+) \s+ \( (.*?) \) \s+ where/sx) {
($module, $exportlist) = ($1, $2);
$exportlist =~ s/\b module \s+ [A-Z] \w*//gsx;
$exportlist =~ s/\(\.\.\)//g;
} elsif (/\n module \s+ ((?: \w | \.)+) \s+ where/sx) {
$module = $1;
# modules without export lists
# push @ERR, "No export list";
# function definitions
while (/^ (.*? $nonOperCharColon) = (?! $operCharColon)/gmx) {
$defn = $1;
next if $defn =~ /^ $keyword \b/x;
if ($defn =~ /\` ($funSym) \`/x) {
$fn = $1;
} elsif ($defn =~ /(?<! $operCharColon) ($operSym)/x
&& $1 !~ $keyOper) {
$fn = "($1)";
} elsif ($defn =~ /^($funSym)/x) {
$fn = $1;
} else {
push @ERR, "!! > Error in function defintion: $defn";
next;
}
$exportlist .= " $fn ";
}
} else {
push @ERR, "No module header found";
}
push @ERR, "Module names not matching: $module != $hdr_module"
if $hdr_module && $module !~ /\Q$hdr_module\E$/;
# fixing exportlist (double spaces as separator)
$exportlist = " $exportlist ";
$exportlist =~ s/(\s | \,)+/ /gx;
# removing functions with type signatures from export list
while (/^ ($funOrOper (\s* , \s* $funOrOper)*) \s* ::/gmx) {
$functionlist = $1;
while ($functionlist =~ s/^ ($funOrOper) (\s* , \s*)?//x) {
$function = $1;
$exportlist =~ s/\s \Q$function\E \s/ /gx;
}
}
# reporting exported functions without type signatures
$reported = 0;
$untyped = "";
while ($exportlist =~ /\s ($funOrOper) \s/x) {
$function = $1;
$exportlist =~ s/\s \Q$function\E \s/ /gx;
$reported++;
$untyped .= " $function";
}
push @ERR, "No type signature for $reported function(s):\n " . $untyped
if $reported;
print "-- $file\n > " . join("\n > ", @ERR) . "\n"
if @ERR;
}

View File

@@ -1,73 +0,0 @@
#!/bin/tcsh
######################################################################
# Author: Peter Ljunglöf
# Time-stamp: "2005-05-12, 23:17"
# CVS $Date: 2005/05/13 12:40:20 $
# CVS $Author: peb $
#
# a script for producing documentation through Haddock
######################################################################
set basedir = `pwd`
set docdir = haddock/html
set tempdir = haddock/.temp-files
set resourcedir = haddock/resources
set files = (`find GF -name '*.hs'` GF.hs)
######################################################################
echo 1. Creating and cleaning Haddock directory
echo -- $docdir
mkdir -p $docdir
rm -rf $docdir/*
######################################################################
echo
echo 2. Copying Haskell files to temporary directory: $tempdir
rm -rf $tempdir
foreach f ($files)
# echo -- $f
mkdir -p `dirname $tempdir/$f`
perl -pe 's/^#/-- CPP #/' $f > $tempdir/$f
end
######################################################################
echo
echo 3. Invoking Haddock
cd $tempdir
haddock -o $basedir/$docdir -h -t 'Grammatical Framework' $files
cd $basedir
######################################################################
echo
echo 4. Restructuring to HTML framesets
echo -- Substituting for frame targets inside html files
mv $docdir/index.html $docdir/index-frame.html
foreach f ($docdir/*.html)
# echo -- $f
perl -pe 's/<HEAD/<HEAD><BASE TARGET="contents"/; s/"index.html"/"index-frame.html"/; s/(<A HREF = "\S*index\S*.html")/$1 TARGET="index"/' $f > .tempfile
mv .tempfile $f
end
echo -- Copying resource files:
echo -- `ls $resourcedir/*.*`
cp $resourcedir/*.* $docdir
######################################################################
echo
echo 5. Finished
echo -- The documentation is located at:
echo -- $docdir/index.html

View File

@@ -1,10 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<!-- Time-stamp: "2005-02-03, 15:59" -->
<HTML>
<HEAD>
<LINK HREF="haddock.css" REL=stylesheet>
</HEAD>
<BODY>
</BODY>
</HTML>

View File

@@ -1,14 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"
"http://www.w3.org/TR/html4/frameset.dtd">
<!-- Time-stamp: "2005-02-03, 15:53" -->
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
<title>Grammatical Framework programmer's documentation</title>
</head>
<frameset cols="1*,2*">
<frame name="index" src="index-frame.html">
<frame name="contents" src="blank.html">
</frameset>
</html>

View File

@@ -1,334 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html><head><title>GF Version 2.9</title></head>
<body bgcolor="#ffffff" text="#000000">
<center>
<img src="gf-logo.gif">
<h1>Grammatical Framework</h1>
<h2>Version 2.9</h2>
December 21, 2007.
<p>
</center>
<hr>
<a href=
"doc">Documentation</a>
|
<a href=
"download/">Download</a>
|
<a href=
"doc/darcs.html">LatestCode</a>
|
<a href=
"doc/gf-quickstart.html">QuickStart</a>
|
<a href=
"doc/gf-tutorial.html">Tutorial</a>
|
<a href=
"doc/gf-refman.html">ReferenceManual</a>
|
<a href="lib/resource/doc/synopsis.html">Libraries</a>
|
<a href=
"http://www.cs.chalmers.se/~bringert/gf/translate/">NumeralDemo</a>
|
<a href=
"http://www.cs.chalmers.se/~markus/gramlets/letter-applet.html">LetterDemo</a>
<hr>
<p>
</p><h2>News</h2>
<i>June 25, 2008</i>.
<a href="doc/gf3-release.html">GF 3.0</a>
coming soon! Version 2.9f is now frozen and no longer
available in darcs. But <a href="download/GF-2.9f.tgz">here</a> is a tarball
with the final version of 2.9 sources.
<p>
<i>March 20, 2008</i>. Ten years of GF!
<ul>
<li> <a href="doc/nancy-slides.pdf">The first public talk</a> at INRIA Nancy,
20 March 1998.
<li> <a href="doc/GF-0.1.tgz">GF Version 0.1</a> source code from XRCE Grenoble
18 March 1998 (Requires the
<a href="http://www.cs.chalmers.se/~augustss/hbc/hbc.html">HBC Haskell Compiler</a>
in "no-pedantic" mode).
</ul>
<p>
<i>December 21, 2007</i>.
<ul>
<li> GF 2.9 is mainly a bug fix version;
<li> preview version of GF3: get the
<a href=
"http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">
latest sources</a> and compile with <tt>make gfc</tt>
<li> new version of the <a href="doc/gf-tutorial.html">tutorial</a>
<li> new <a href="doc/gf-refman.html">reference manual</a>
<li> <a href="demos/resource-api/editor.html">resource api browser</a>
using interactive editing
</ul>
<p>
<a href="doc/old-news.html">News before 2.9</a>.
</p><h2>What is GF?</h2>
The Grammatical Framework (=GF) is a grammar formalism based on type
theory. It consists of
<ul>
<li> a special-purpose programming language
</li><li> a compiler of the language
</li><li> a generic grammar processor
</li></ul>
The compiler reads
GF grammars from user-provided files,
and the generic grammar processor performs
various tasks with the grammars:
<ul>
<li> generation
</li><li> parsing
</li><li> translation
</li><li> type checking
</li><li> computation
</li><li> paraphrasing
</li><li> random and exhaustive generation
</li><li> syntax editing
</li></ul>
GF particularly addresses four aspects of grammars:
<ul>
<li> multilinguality (parallel grammars for different languages)
</li><li> semantics (semantic conditions of well-formedness, semantic
properties of expressions)
<li> modularity and grammar engineering
<li> reuse of grammars in different formats and as software components
</ul>
GF provides an easy way to experiment with grammars written in
different formats, including the ubiquitous BNF and EBNF formats.
The <a href="doc/gf-compiler.png">GF compilation chart</a> gives a
summary of the supported input and output formats (the nodes in ellipses).
<br>
For instance, if you want to create a finite-state automaton
in the HTK SLF format (to use for speech recognition), all you have to do
is to write an EBNF grammar in a file <tt>foo.ebnf</tt> and type
<pre>
echo "pg -printer=slf" | gf foo.ebnf
</pre>
<h2>License</h2>
GF is open-source software licensed under
<a href="LICENSE">GNU General Public License (GPL)</a>.
<p>
The <a href="lib">GF Grammar Libraries</a> are licensed under
<a href="lib/resource/LICENSE">GNU Lesser General Public License (LGPL)</a>.
<h2>Examples and demos</h2>
<a href="http://www.cs.chalmers.se/~bringert/gf/translate/">Numeral
translator</a>: recognizes and generates
numbers from 1 to 999,999 in 80 languages.
(The link goes to a live applet, which requires
<a href="http://java.sun.com/j2se/1.5.0/download.jsp">Java 1.5 plugin</a>.
Here is an <a href="doc/2341.html">example</a>, which does
not require the plugin.)
<p>
<a href="http://www.cs.chalmers.se/%7Ekrijo/gramlets/letter-applet.html">Letter
editor</a>:
write simple letters in English, Finnish,
French, Swedish, and Russian with a few mouse clicks.
<p>
<a
href="http://www.cs.chalmers.se/~bringert/misc/tramdemo.avi">Demo film</a>
of a multimodal dialogue system built with embedded grammars.
<p>
<a href="examples/tutorial/">Example grammars</a> used in the
<a href="doc/gf-tutorial.html">GF tutorial</a>.
<p>
<a href="lib/resource/doc/index.html">Resource grammar library</a>:
basic structures of ten languages
(Danish, English, Finnish, French, German,
Italian, Norwegian, Russian, Spanish, Swedish).
Resource grammars can be used as libraries for writing GF
applications,
but they can also be useful for language training.
<h2>Executable programs</h2>
GF is available for
several platforms: Linux, Mac OS X, Microsoft Windows, and Sun OS.
To get GF, go to the
<a href="download">Download Page</a>.
<h2>Quick start</h2>
When you have downloaded and installed GF, you can try one of the
<a href="doc/gf-quickstart.html">quick start examples</a>.
<h2>Source code</h2>
The main part of GF is written in
<a href="http://www.haskell.org/">Haskell</a>.
<p>
The platform-independent graphical user interface is written in
<a href="http://java.sun.com/">Java</a>.
</p><p>
The <a href="download/">Download Page</a>
gives links to source and binary packages, as well as
information on compiler requirements.
<p>
The publicly accessible
<a href="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">
Darcs repository</a>
has the latest sources and documents.
<p>
For Java programmers: GF grammars can be embedded in Java programs by using the
<a href="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">
Embedded GF Interpreter</a>.
</p><h2>Documents</h2>
See the <a href="doc/index.html">Documentation page</a>.
<h2>Projects and events</h2>
<li> <a href="http://webalt.math.helsinki.fi/content/index_eng.html">WebALT</a>,
Web Advanced Learning Technologies. GF is used as for generating multilingual
teaching material in mathematics.
<li> <a href="http://www.talk-project.org">TALK</a> = Tools for Ambient Linguistic
Knowledge</a>. GF was used in implementing multimodal and multilingual dialogue systems.
<li> <a href="http://www.key-project.org/">KeY</a> project on Integrated Deductive
Software Design. GF was used for
authoring informal and formal specifications. More details on the GF
application
<a href="http://www.cs.chalmers.se/%7Ekrijo/gfspec">
here</a>.
<li>
Project <a href="http://efficient.citi.tudor.lu/index_noframe.html">Efficient</a>
at Tudor Institute, Luxembourg, "atelier de prototypage de transactions d'e-commerce".
GF is used as an authoring tool for business models.
<h2>Miscellaneous</h2>
</li><li>
<a href="doc/gfcc.pdf">
GFCC</a>:
report on a compiler from a fragment of C to JVM, written in GF.
The compiler source code can be found in the directory
<tt>examples/gfcc</tt> in the GF grammar library
(see <a href="http://sourceforge.net/project/showfiles.php?group_id=132285">GF download page</a>).
</li><li>
The original <a href="http://www.xrce.xerox.com/">
GF Xerox Home Page</a>
with the oldest releases of and documents on GF, up to Version 0.54, 1999,
does not seem to exist any more.
</li><li>
Earlier application:
<a href="http://www.cs.chalmers.se/%7Ehallgren/Alfa/Tutorial/GFplugin.html">
Natural-Language Interface to the proof editor Alfa</a>.
</li><li>
<a href="http://www.cs.chalmers.se/%7Emarkus/BNFC">The BNF Converter</a>.
A GF spin-off customized for the description of programming
languages.
</li><li>
<a href="http://www.cs.chalmers.se/%7Emarkus/FM">The Functional
Morphology project</a>. Creating infrastructure for GF and other
linguistic applications.
</li></ul>
<h2>Authors</h2>
The <a href="http://www.cs.chalmers.se/Cs/Research/Language-technology/">
Languge Technology Group</a>.
More details on the
<a href="http://www.cs.chalmers.se/%7Eaarne/GF/doc/gf-people.html">
Authors and Acknowledgements</a> page.
<h2>Implementation project</h2>
Want to become a GF developer? Contact
<a href="http://www.cs.chalmers.se/%7Eaarne/">Aarne Ranta</a>.
Or just get the sources and start hacking.
<p>
And register to the
<a href="https://lists.sourceforge.net/lists/listinfo/gf-tools-users">GF User's Mailing List</a>!
<hr>
Last modified by
<a href="http://www.cs.chalmers.se/%7Eaarne">
Aarne Ranta</a>,
December 21, 2007.
</body></html>

View File

@@ -1,251 +0,0 @@
#!/bin/sh
#
# install - install a program, script, or datafile
# This comes from X11R5 (mit/util/scripts/install.sh).
#
# Copyright 1991 by the Massachusetts Institute of Technology
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation, and that the name of M.I.T. not be used in advertising or
# publicity pertaining to distribution of the software without specific,
# written prior permission. M.I.T. makes no representations about the
# suitability of this software for any purpose. It is provided "as is"
# without express or implied warranty.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# `make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch. It can only install one file at a time, a restriction
# shared with many OS's install programs.
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit="${DOITPROG-}"
# put in absolute paths if you don't have them in your path; or use env. vars.
mvprog="${MVPROG-mv}"
cpprog="${CPPROG-cp}"
chmodprog="${CHMODPROG-chmod}"
chownprog="${CHOWNPROG-chown}"
chgrpprog="${CHGRPPROG-chgrp}"
stripprog="${STRIPPROG-strip}"
rmprog="${RMPROG-rm}"
mkdirprog="${MKDIRPROG-mkdir}"
transformbasename=""
transform_arg=""
instcmd="$mvprog"
chmodcmd="$chmodprog 0755"
chowncmd=""
chgrpcmd=""
stripcmd=""
rmcmd="$rmprog -f"
mvcmd="$mvprog"
src=""
dst=""
dir_arg=""
while [ x"$1" != x ]; do
case $1 in
-c) instcmd="$cpprog"
shift
continue;;
-d) dir_arg=true
shift
continue;;
-m) chmodcmd="$chmodprog $2"
shift
shift
continue;;
-o) chowncmd="$chownprog $2"
shift
shift
continue;;
-g) chgrpcmd="$chgrpprog $2"
shift
shift
continue;;
-s) stripcmd="$stripprog"
shift
continue;;
-t=*) transformarg=`echo $1 | sed 's/-t=//'`
shift
continue;;
-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
shift
continue;;
*) if [ x"$src" = x ]
then
src=$1
else
# this colon is to work around a 386BSD /bin/sh bug
:
dst=$1
fi
shift
continue;;
esac
done
if [ x"$src" = x ]
then
echo "install: no input file specified"
exit 1
else
true
fi
if [ x"$dir_arg" != x ]; then
dst=$src
src=""
if [ -d $dst ]; then
instcmd=:
chmodcmd=""
else
instcmd=mkdir
fi
else
# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if [ -f $src -o -d $src ]
then
true
else
echo "install: $src does not exist"
exit 1
fi
if [ x"$dst" = x ]
then
echo "install: no destination specified"
exit 1
else
true
fi
# If destination is a directory, append the input filename; if your system
# does not like double slashes in filenames, you may need to add some logic
if [ -d $dst ]
then
dst="$dst"/`basename $src`
else
true
fi
fi
## this sed command emulates the dirname command
dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
# Make sure that the destination directory exists.
# this part is taken from Noah Friedman's mkinstalldirs script
# Skip lots of stat calls in the usual case.
if [ ! -d "$dstdir" ]; then
defaultIFS='
'
IFS="${IFS-${defaultIFS}}"
oIFS="${IFS}"
# Some sh's can't handle IFS=/ for some reason.
IFS='%'
set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
IFS="${oIFS}"
pathcomp=''
while [ $# -ne 0 ] ; do
pathcomp="${pathcomp}${1}"
shift
if [ ! -d "${pathcomp}" ] ;
then
$mkdirprog "${pathcomp}"
else
true
fi
pathcomp="${pathcomp}/"
done
fi
if [ x"$dir_arg" != x ]
then
$doit $instcmd $dst &&
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
else
# If we're going to rename the final executable, determine the name now.
if [ x"$transformarg" = x ]
then
dstfile=`basename $dst`
else
dstfile=`basename $dst $transformbasename |
sed $transformarg`$transformbasename
fi
# don't allow the sed command to completely eliminate the filename
if [ x"$dstfile" = x ]
then
dstfile=`basename $dst`
else
true
fi
# Make a temp file name in the proper directory.
dsttmp=$dstdir/#inst.$$#
# Move or copy the file name to the temp name
$doit $instcmd $src $dsttmp &&
trap "rm -f ${dsttmp}" 0 &&
# and set any options; do chmod last to preserve setuid bits
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $instcmd $src $dsttmp" command.
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
# Now rename the file to the real destination.
$doit $rmcmd -f $dstdir/$dstfile &&
$doit $mvcmd $dsttmp $dstdir/$dstfile
fi &&
exit 0

View File

@@ -1,8 +0,0 @@
#! /bin/sh
JGUILIB=$GFHOME/src/JavaGUI
GF=$GFHOME/bin/gf
JGUI=GFEditor2
java -cp $JGUILIB $JGUI "$GF -java $*"

View File

@@ -1,165 +0,0 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

View File

@@ -1,60 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<TITLE>GF Grammar Libraries</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>GF Grammar Libraries</H1>
<FONT SIZE="4">
<I>Author: Aarne Ranta &lt;aarne (at) cs.chalmers.se&gt;</I><BR>
Last update: Fri Dec 22 15:19:46 2006
</FONT></CENTER>
<P>
One of the main ideas of
<A HREF="..">GF</A>
is the use of libraries in grammar writing, in a way familiar
from software engineering. In this way, large grammars can
be built in cooperation, and old grammars or parts of them
can be reused in new grammars. The slides
<A HREF="resource-1.0/doc/gslt-sem-2006.html">Grammars as Software Libraries</A>
give some introduction to this idea.
</P>
<H2>The resource grammar library</H2>
<P>
This library covers basic linguistic structures of
different languages.
</P>
<P>
<A HREF="resource-1.0/doc">Version 1.1</A> released 22 December 2006
(enhanced version of 1.0).
Covers Danish, English, Finnish, French, German, Italian, Norwegian,
Russian, Spanish, and Swedish.
</P>
<P>
Two older versions are also available:
<A HREF="resource/">Version 0.9</A>
and
<A HREF="resource-0.6/">Version 0.6</A>.
</P>
<H2>The prelude library</H2>
<P>
The <A HREF="prelude/">prelude</A>
library gives utility functions for different GF applications:
</P>
<P>
<A HREF="resource-1.0/doc/gfdoc/Precedence.html">Precedence</A>. Utilities for
formal languages: precedence levels, associatives, infixes.
</P>
<P>
<A HREF="resource-1.0/doc/gfdoc/Predef.html">Predef</A>. Type signatures
of predefined (hard-coded) functions.
</P>
<P>
<A HREF="resource-1.0/doc/gfdoc/Prelude.html">Prelude</A>. Generic utilities
for strings, tables, records, booleans.
</P>
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags index.txt -->
</BODY></HTML>

View File

@@ -1,58 +0,0 @@
GF Grammar Libraries
Author: Aarne Ranta <aarne (at) cs.chalmers.se>
Last update: %%date(%c)
% NOTE: this is a txt2tags file.
% Create an html file from this file using:
% txt2tags --toc -thtml index.txt
%!target:html
One of the main ideas of
[GF ..]
is the use of libraries in grammar writing, in a way familiar
from software engineering. In this way, large grammars can
be built in cooperation, and old grammars or parts of them
can be reused in new grammars. The slides
[Grammars as Software Libraries resource-1.0/doc/gslt-sem-2006.html]
give some introduction to this idea.
==The resource grammar library==
This library covers basic linguistic structures of
different languages.
[Version 1.2 resource-1.0/doc] released 22 December 2006
(enhanced version of 1.0).
Covers Danish, English, Finnish, French, German, Italian, Norwegian,
Russian, Spanish, and Swedish, and to a smaller extent Arabic and Catalan.
Two older versions are also available:
[Version 0.9 resource/]
and
[Version 0.6 resource-0.6/].
==The prelude library==
The [prelude prelude/]
library gives utility functions for different GF applications:
[Precedence resource-1.0/doc/gfdoc/Precedence.html]. Utilities for
formal languages: precedence levels, associatives, infixes.
[Predef resource-1.0/doc/gfdoc/Predef.html]. Type signatures
of predefined (hard-coded) functions.
[Prelude resource-1.0/doc/gfdoc/Prelude.html]. Generic utilities
for strings, tables, records, booleans.
==License==
All libraries in this directory and its subdirectories are
releaced under GNU Lesser General Public License (LGPL). See the file
[LICENSE ./LICENSE] for more details.

View File

@@ -1,129 +0,0 @@
resource Coordination = open Prelude in {
param
ListSize = TwoElem | ManyElem ;
oper
ListX = {s1,s2 : Str} ;
twoStr : (x,y : Str) -> ListX = \x,y ->
{s1 = x ; s2 = y} ;
consStr : Str -> ListX -> Str -> ListX = \comma,xs,x ->
{s1 = xs.s1 ++ comma ++ xs.s2 ; s2 = x } ;
twoSS : (_,_ : SS) -> ListX = \x,y ->
twoStr x.s y.s ;
consSS : Str -> ListX -> SS -> ListX = \comma,xs,x ->
consStr comma xs x.s ;
Conjunction : Type = SS ;
ConjunctionDistr : Type = {s1 : Str ; s2 : Str} ;
conjunctX : Conjunction -> ListX -> Str = \or,xs ->
xs.s1 ++ or.s ++ xs.s2 ;
conjunctDistrX : ConjunctionDistr -> ListX -> Str = \or,xs ->
or.s1 ++ xs.s1 ++ or.s2 ++ xs.s2 ;
conjunctSS : Conjunction -> ListX -> SS = \or,xs ->
ss (xs.s1 ++ or.s ++ xs.s2) ;
conjunctDistrSS : ConjunctionDistr -> ListX -> SS = \or,xs ->
ss (or.s1 ++ xs.s1 ++ or.s2 ++ xs.s2) ;
-- all this lifted to tables
ListTable : Type -> Type = \P -> {s1,s2 : P => Str} ;
twoTable : (P : Type) -> (_,_ : {s : P => Str}) -> ListTable P = \_,x,y ->
{s1 = x.s ; s2 = y.s} ;
consTable : (P : Type) -> Str -> ListTable P -> {s : P => Str} -> ListTable P =
\P,c,xs,x ->
{s1 = table P {o => xs.s1 ! o ++ c ++ xs.s2 ! o} ; s2 = x.s} ;
conjunctTable : (P : Type) -> Conjunction -> ListTable P -> {s : P => Str} =
\P,or,xs ->
{s = table P {p => xs.s1 ! p ++ or.s ++ xs.s2 ! p}} ;
conjunctDistrTable :
(P : Type) -> ConjunctionDistr -> ListTable P -> {s : P => Str} = \P,or,xs ->
{s = table P {p => or.s1++ xs.s1 ! p ++ or.s2 ++ xs.s2 ! p}} ;
-- ... and to two- and three-argument tables: how clumsy! ---
ListTable2 : Type -> Type -> Type = \P,Q ->
{s1,s2 : P => Q => Str} ;
twoTable2 : (P,Q : Type) -> (_,_ : {s : P => Q => Str}) -> ListTable2 P Q =
\_,_,x,y ->
{s1 = x.s ; s2 = y.s} ;
consTable2 :
(P,Q : Type) -> Str -> ListTable2 P Q -> {s : P => Q => Str} -> ListTable2 P Q =
\P,Q,c,xs,x ->
{s1 = table P {p => table Q {q => xs.s1 ! p ! q ++ c ++ xs.s2 ! p! q}} ;
s2 = x.s
} ;
conjunctTable2 :
(P,Q : Type) -> Conjunction -> ListTable2 P Q -> {s : P => Q => Str} =
\P,Q,or,xs ->
{s = table P {p => table Q {q => xs.s1 ! p ! q ++ or.s ++ xs.s2 ! p ! q}}} ;
conjunctDistrTable2 :
(P,Q : Type) -> ConjunctionDistr -> ListTable2 P Q -> {s : P => Q => Str} =
\P,Q,or,xs ->
{s =
table P {p => table Q {q => or.s1++ xs.s1 ! p ! q ++ or.s2 ++ xs.s2 ! p ! q}}} ;
ListTable3 : Type -> Type -> Type -> Type = \P,Q,R ->
{s1,s2 : P => Q => R => Str} ;
twoTable3 : (P,Q,R : Type) -> (_,_ : {s : P => Q => R => Str}) ->
ListTable3 P Q R =
\_,_,_,x,y ->
{s1 = x.s ; s2 = y.s} ;
consTable3 :
(P,Q,R : Type) -> Str -> ListTable3 P Q R -> {s : P => Q => R => Str} ->
ListTable3 P Q R =
\P,Q,R,c,xs,x ->
{s1 = \\p,q,r => xs.s1 ! p ! q ! r ++ c ++ xs.s2 ! p ! q ! r ;
s2 = x.s
} ;
conjunctTable3 :
(P,Q,R : Type) -> Conjunction -> ListTable3 P Q R -> {s : P => Q => R => Str} =
\P,Q,R,or,xs ->
{s = \\p,q,r => xs.s1 ! p ! q ! r ++ or.s ++ xs.s2 ! p ! q ! r} ;
conjunctDistrTable3 :
(P,Q,R : Type) -> ConjunctionDistr -> ListTable3 P Q R ->
{s : P => Q => R => Str} =
\P,Q,R,or,xs ->
{s = \\p,q,r => or.s1++ xs.s1 ! p ! q ! r ++ or.s2 ++ xs.s2 ! p ! q ! r} ;
comma = "," ;
-- you can also do this to right-associative lists:
consrStr : Str -> Str -> ListX -> ListX = \comma,x,xs ->
{s1 = x ++ comma ++ xs.s1 ; s2 = xs.s2 } ;
consrSS : Str -> SS -> ListX -> ListX = \comma,x,xs ->
consrStr comma x.s xs ;
consrTable : (P : Type) -> Str -> {s : P => Str} -> ListTable P -> ListTable P =
\P,c,x,xs ->
{s1 = table P {o => x.s ! o ++ c ++ xs.s1 ! o} ; s2 = xs.s2} ;
consrTable2 : (P,Q : Type) -> Str -> {s : P => Q => Str} ->
ListTable2 P Q -> ListTable2 P Q =
\P,Q,c,x,xs ->
{s1 = table P {p => table Q {q => x.s ! p ! q ++ c ++ xs.s1 ! p ! q}} ;
s2 = xs.s2
} ;
} ;

View File

@@ -1,54 +0,0 @@
resource Formal = open Prelude in {
-- to replace the old library Precedence
oper
Prec : PType ;
TermPrec : Type = {s : Str ; p : Prec} ;
mkPrec : Prec -> Str -> TermPrec = \p,s ->
{s = s ; p = p} ;
top : TermPrec -> Str = usePrec 0 ;
constant : Str -> TermPrec = mkPrec highest ;
infixl : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y ->
mkPrec p (usePrec p x ++ f ++ usePrec (nextPrec p) y) ;
infixr : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y ->
mkPrec p (usePrec (nextPrec p) x ++ f ++ usePrec p y) ;
infixn : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y ->
mkPrec p (usePrec (nextPrec p) x ++ f ++ usePrec (nextPrec p) y) ;
-- auxiliaries, should not be needed so much
usePrec : Prec -> TermPrec -> Str = \p,x ->
case lessPrec x.p p of {
True => parenth x.s ;
False => parenthOpt x.s
} ;
parenth : Str -> Str = \s -> "(" ++ s ++ ")" ;
parenthOpt : Str -> Str = \s -> variants {s ; "(" ++ s ++ ")"} ;
--.
-- low-level things: don't use
Prec : PType = Predef.Ints 4 ;
highest = 4 ;
lessPrec : Prec -> Prec -> Bool = \p,q ->
case <<p,q> : Prec * Prec> of {
<3,4> | <2,3> | <2,4> => True ;
<1,1> | <1,0> | <0,0> => False ;
<1,_> | <0,_> => True ;
_ => False
} ;
nextPrec : Prec -> Prec = \p -> case <p : Prec> of {
4 => 4 ;
n => Predef.plus n 1
} ;
}

View File

@@ -1,8 +0,0 @@
resource HTML = open Prelude in {
oper
tag : Str -> Str = \t -> "<" + t + ">" ;
endtag : Str -> Str = \t -> tag ("/" + t) ;
intag : Str -> Str -> Str = \t,s -> tag t ++ s ++ endtag t ;
intagAttr : Str -> Str -> Str -> Str =
\t,a,s -> ("<" + t) ++ (a + ">") ++ s ++ endtag t ;
}

View File

@@ -1,12 +0,0 @@
resource Latex = open Prelude in {
oper
command : Str -> Str = \c -> "\\" + c ;
fun1 : Str -> Str -> Str = \f,x -> "\\" + f + "{" ++ x ++ "}" ;
fun2 : Str -> Str -> Str -> Str =
\f,x,y -> "\\" + f + "{" ++ x ++ "}{" ++ y ++ "}" ;
begin : Str -> Str = \e -> "\\begin{" + e + "}" ;
end : Str -> Str = \e -> "\\end{" + e + "}" ;
inEnv : Str -> Str -> Str = \e,s -> begin e ++ s ++ end e ;
}

View File

@@ -1,117 +0,0 @@
-- operations for precedence-dependent strings.
-- five levels:
-- p4 (constants), p3 (applications), p2 (products), p1 (sums), p0 (arrows)
resource Precedence = open Prelude in {
param
Prec = p4 | p3 | p2 | p1 | p0 ;
oper
PrecTerm = Prec => Str ;
oper
pss : PrecTerm -> {s : PrecTerm} = \s -> {s = s} ;
-- change this if you want some other type of parentheses
mkParenth : Str -> Str = \str -> "(" ++ str ++ ")" ;
-- define ordering of precedences
nextPrec : Prec => Prec =
table {p0 => p1 ; p1 => p2 ; p2 => p3 ; _ => p4} ;
prevPrec : Prec => Prec =
table {p4 => p3 ; p3 => p2 ; p2 => p1 ; _ => p0} ;
mkPrec : Str -> Prec => Prec => Str = \str ->
table {
p4 => table { -- use the term of precedence p4...
_ => str} ; -- ...always without parentheses
p3 => table { -- use the term of precedence p3...
p4 => mkParenth str ; -- ...in parentheses if p4 is required...
_ => str} ; -- ...otherwise without parentheses
p2 => table {
p4 => mkParenth str ;
p3 => mkParenth str ;
_ => str} ;
p1 => table {
p1 => str ;
p0 => str ;
_ => mkParenth str} ;
p0 => table {
p0 => str ;
_ => mkParenth str}
} ;
-- make a string into a constant, of precedence p4
mkConst : Str -> PrecTerm =
\f ->
mkPrec f ! p4 ;
-- make a string into a 1/2/3 -place prefix operator, of precedence p3
mkFun1 : Str -> PrecTerm -> PrecTerm =
\f -> \x ->
table {k => mkPrec (f ++ x ! p4) ! p3 ! k} ;
mkFun2 : Str -> PrecTerm -> PrecTerm -> PrecTerm =
\f -> \x -> \y ->
table {k => mkPrec (f ++ x ! p4 ++ y ! p4) ! p3 ! k} ;
mkFun3 : Str -> PrecTerm -> PrecTerm -> PrecTerm -> PrecTerm =
\f -> \x -> \y -> \z ->
table {k => mkPrec (f ++ x ! p4 ++ y ! p4 ++ z ! p4) ! p3 ! k} ;
-- make a string into a non/left/right -associative infix operator, of precedence p
mkInfix : Str -> Prec -> PrecTerm -> PrecTerm -> PrecTerm =
\f -> \p -> \x -> \y ->
table {k => mkPrec (x ! (nextPrec ! p) ++ f ++ y ! (nextPrec ! p)) ! p ! k} ;
mkInfixL : Str -> Prec -> PrecTerm -> PrecTerm -> PrecTerm =
\f -> \p -> \x -> \y ->
table {k => mkPrec (x ! p ++ f ++ y ! (nextPrec ! p)) ! p ! k} ;
mkInfixR : Str -> Prec -> PrecTerm -> PrecTerm -> PrecTerm =
\f -> \p -> \x -> \y ->
table {k => mkPrec (x ! (nextPrec ! p) ++ f ++ y ! p) ! p ! k} ;
-----------------------------------------------------------
-- alternative:
-- precedence as inherent feature
oper TermWithPrec = {s : Str ; p : Prec} ;
oper
mkpPrec : Str -> Prec -> TermWithPrec =
\f -> \p ->
{s = f ; p = p} ;
usePrec : TermWithPrec -> Prec -> Str =
\x -> \p ->
mkPrec x.s ! x.p ! p ;
-- make a string into a constant, of precedence p4
mkpConst : Str -> TermWithPrec =
\f ->
mkpPrec f p4 ;
-- make a string into a 1/2/3 -place prefix operator, of precedence p3
mkpFun1 : Str -> TermWithPrec -> TermWithPrec =
\f -> \x ->
mkpPrec (f ++ usePrec x p4) p3 ;
mkpFun2 : Str -> TermWithPrec -> TermWithPrec -> TermWithPrec =
\f -> \x -> \y ->
mkpPrec (f ++ usePrec x p4 ++ usePrec y p4) p3 ;
mkpFun3 : Str -> TermWithPrec -> TermWithPrec -> TermWithPrec -> TermWithPrec =
\f -> \x -> \y -> \z ->
mkpPrec (f ++ usePrec x p4 ++ usePrec y p4 ++ usePrec z p4) p3 ;
-- make a string a into non/left/right -associative infix operator, of precedence p
mkpInfix : Str -> Prec -> TermWithPrec -> TermWithPrec -> TermWithPrec =
\f -> \p -> \x -> \y ->
mkpPrec (usePrec x (nextPrec ! p) ++ f ++ usePrec y (nextPrec ! p)) p ;
mkpInfixL : Str -> Prec -> TermWithPrec -> TermWithPrec -> TermWithPrec =
\f -> \p -> \x -> \y ->
mkpPrec (usePrec x p ++ f ++ usePrec y (nextPrec ! p)) p ;
mkpInfixR : Str -> Prec -> TermWithPrec -> TermWithPrec -> TermWithPrec =
\f -> \p -> \x -> \y ->
mkpPrec (usePrec x (nextPrec ! p) ++ f ++ usePrec y p) p ;
} ;

View File

@@ -1,37 +0,0 @@
--1 Predefined functions for concrete syntax
-- The definitions of these constants are hard-coded in GF, and defined
-- in [AppPredefined.hs ../src/GF/Grammar/AppPredefined.hs]. Applying
-- them to run-time variables leads to compiler errors that are often
-- only detected at the code generation time.
resource Predef = {
-- This type of booleans is for internal use only.
param PBool = PTrue | PFalse ;
oper Error : Type = variants {} ; -- the empty type
oper Int : Type = variants {} ; -- the type of integers
oper Ints : Int -> Type = variants {} ; -- the type of integers from 0 to n
oper error : Str -> Error = variants {} ; -- forms error message
oper length : Tok -> Int = variants {} ; -- length of string
oper drop : Int -> Tok -> Tok = variants {} ; -- drop prefix of length
oper take : Int -> Tok -> Tok = variants {} ; -- take prefix of length
oper tk : Int -> Tok -> Tok = variants {} ; -- drop suffix of length
oper dp : Int -> Tok -> Tok = variants {} ; -- take suffix of length
oper eqInt : Int -> Int -> PBool = variants {} ; -- test if equal integers
oper lessInt: Int -> Int -> PBool = variants {} ; -- test order of integers
oper plus : Int -> Int -> Int = variants {} ; -- add integers
oper eqStr : Tok -> Tok -> PBool = variants {} ; -- test if equal strings
oper occur : Tok -> Tok -> PBool = variants {} ; -- test if occurs as substring
oper occurs : Tok -> Tok -> PBool = variants {} ; -- test if any char occurs
oper show : (P : Type) -> P -> Tok = variants {} ; -- convert param to string
oper read : (P : Type) -> Tok -> P = variants {} ; -- convert string to param
oper toStr : (L : Type) -> L -> Str = variants {} ; -- find the "first" string
oper mapStr : (L : Type) -> (Str -> Str) -> L -> L = variants {} ;
-- map all strings in a data structure; experimental ---
} ;

View File

@@ -1,4 +0,0 @@
abstract PredefAbs = {
cat Int ; String ; Float ;
} ;

View File

@@ -1,4 +0,0 @@
concrete PredefCnc of PredefAbs = {
lincat
Int, Float, String = {s : Str} ;
} ;

View File

@@ -1,142 +0,0 @@
--1 The GF Prelude
-- This file defines some prelude facilities usable in all grammars.
resource Prelude = open (Predef=Predef) in {
oper
--2 Strings, records, and tables
SS : Type = {s : Str} ;
ss : Str -> SS = \s -> {s = s} ;
ss2 : (_,_ : Str) -> SS = \x,y -> ss (x ++ y) ;
ss3 : (_,_ ,_: Str) -> SS = \x,y,z -> ss (x ++ y ++ z) ;
cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ;
cc3 : (_,_,_ : SS) -> SS = \x,y,z -> ss (x.s ++ y.s ++ z.s) ;
SS1 : Type -> Type = \P -> {s : P => Str} ;
ss1 : (A : Type) -> Str -> SS1 A = \A,s -> {s = table {_ => s}} ;
SP1 : Type -> Type = \P -> {s : Str ; p : P} ;
sp1 : (A : Type) -> Str -> A -> SP1 A = \_,s,a -> {s = s ; p = a} ;
constTable : (A,B : Type) -> B -> A => B = \_,_,b -> \\_ => b ;
constStr : (A : Type) -> Str -> A => Str = \A -> constTable A Str ;
-- Discontinuous constituents.
SD2 : Type = {s1,s2 : Str} ;
sd2 : (_,_ : Str) -> SD2 = \x,y -> {s1 = x ; s2 = y} ;
--2 Optional elements
-- Missing form.
nonExist : Str = variants {} ;
-- Optional string with preference on the string vs. empty.
optStr : Str -> Str = \s -> variants {s ; []} ;
strOpt : Str -> Str = \s -> variants {[] ; s} ;
-- Free order between two strings.
bothWays : Str -> Str -> Str = \x,y -> variants {x ++ y ; y ++ x} ;
-- Parametric order between two strings.
preOrPost : Bool -> Str -> Str -> Str = \pr,x,y ->
if_then_Str pr (x ++ y) (y ++ x) ;
--2 Infixes. prefixes, and postfixes
-- Fixes with precedences are defined in [Precedence Precedence.html].
infixSS : Str -> SS -> SS -> SS = \f,x,y -> ss (x.s ++ f ++ y.s) ;
prefixSS : Str -> SS -> SS = \f,x -> ss (f ++ x.s) ;
postfixSS : Str -> SS -> SS = \f,x -> ss (x.s ++ f) ;
embedSS : Str -> Str -> SS -> SS = \f,g,x -> ss (f ++ x.s ++ g) ;
--2 Booleans
param Bool = True | False ;
oper
if_then_else : (A : Type) -> Bool -> A -> A -> A = \_,c,d,e ->
case c of {
True => d ; ---- should not need to qualify
False => e
} ;
andB : (_,_ : Bool) -> Bool = \a,b -> if_then_else Bool a b False ;
orB : (_,_ : Bool) -> Bool = \a,b -> if_then_else Bool a True b ;
notB : Bool -> Bool = \a -> if_then_else Bool a False True ;
if_then_Str : Bool -> Str -> Str -> Str = if_then_else Str ;
onlyIf : Bool -> Str -> Str = \b,s -> case b of {
True => s ;
_ => nonExist
} ;
-- Interface to internal booleans
pbool2bool : Predef.PBool -> Bool = \b -> case b of {
Predef.PFalse => False ; Predef.PTrue => True
} ;
init : Tok -> Tok = Predef.tk 1 ;
last : Tok -> Tok = Predef.dp 1 ;
--2 High-level acces to Predef operations
isNil : Tok -> Bool = \b -> pbool2bool (Predef.eqStr [] b) ;
ifTok : (A : Type) -> Tok -> Tok -> A -> A -> A = \A,t,u,a,b ->
case Predef.eqStr t u of {Predef.PTrue => a ; Predef.PFalse => b} ;
--2 Lexer-related operations
-- Bind together two tokens in some lexers, either obligatorily or optionally
oper
glue : Str -> Str -> Str = \x,y -> x ++ BIND ++ y ;
glueOpt : Str -> Str -> Str = \x,y -> variants {glue x y ; x ++ y} ;
noglueOpt : Str -> Str -> Str = \x,y -> variants {x ++ y ; glue x y} ;
-- Force capitalization of next word in some unlexers
capitalize : Str -> Str = \s -> CAPIT ++ s ;
-- These should be hidden, and never changed since they are hardcoded in (un)lexers
BIND : Str = "&+" ;
PARA : Str = "&-" ;
CAPIT : Str = "&|" ;
--2 Miscellaneous
-- Identity function
id : (A : Type) -> A -> A = \_,a -> a ;
-- Parentheses
paren : Str -> Str = \s -> "(" ++ s ++ ")" ;
parenss : SS -> SS = \s -> ss (paren s.s) ;
-- Zero, one, two, or more (elements in a list etc)
param
ENumber = E0 | E1 | E2 | Emore ;
oper
eNext : ENumber -> ENumber = \e -> case e of {
E0 => E1 ; E1 => E2 ; _ => Emore} ;
}

View File

@@ -1,193 +0,0 @@
module Main where
import Control.Monad
import Data.Maybe
import System.Cmd
import System.Directory
import System.Environment
import System.Exit
-- Make commands for compiling and testing resource grammars.
-- usage: runghc Make ((present? OPT?) | (clone FILE))? LANGS?
-- where
-- - OPT = (lang | api | math | pgf | test | demo | clean)
-- - LANGS has the form e.g. langs=Eng,Fin,Rus
-- - clone with a flag file=FILENAME clones the file to the specified languages,
-- by replacing the 3-letter language name of the original in both
-- the filename and the body
-- with each name in the list (default: all languages)
-- With no argument, lang and api are done, in this order.
-- See 'make' below for what is done by which command.
default_gf = "../../dist/build/gf/gf"
presApiPath = "-path=api:present"
-- the languages have long directory names and short ISO codes (3 letters)
-- we also give the decodings for postprocessing linearizations, as long as grammars
-- don't support all flags needed; they are used in tests
langsCoding = [
(("arabic", "Ara"),""),
(("bulgarian","Bul"),""),
(("catalan", "Cat"),""),
(("danish", "Dan"),""),
(("english", "Eng"),""),
(("finnish", "Fin"),""),
(("french", "Fre"),""),
(("hindi", "Hin"),"to_devanagari"),
(("german", "Ger"),""),
(("interlingua","Ina"),""),
(("italian", "Ita"),""),
(("norwegian","Nor"),""),
(("russian", "Rus"),""),
(("spanish", "Spa"),""),
(("swedish", "Swe"),""),
(("thai", "Tha"),"to_thai")
]
langs = map fst langsCoding
-- languagues for which to compile Lang
langsLang = langs `except` ["Ara"]
-- languages for which to compile Try
langsAPI = langsLang `except` ["Ara","Bul","Hin","Ina","Rus","Tha"]
-- languages for which to compile Mathematical
langsMath = langsAPI
-- languages for which to run treebank test
langsTest = langsLang `except` ["Ara","Bul","Cat","Hin","Rus","Spa","Tha"]
-- languages for which to run demo test
langsDemo = langsLang `except` ["Ara","Hin","Ina","Tha"]
-- languages for which langs.pgf is built
langsPGF = langsTest `only` ["Eng","Fre","Swe"]
-- languages for which Compatibility exists (to be extended)
langsCompat = langsLang `only` ["Cat","Eng","Fin","Fre","Ita","Spa","Swe"]
treebankExx = "exx-resource.gft"
treebankResults = "exx-resource.gftb"
main = do
xx <- getArgs
make xx
make :: [String] -> IO ()
make xx = do
let ifx opt act = if null xx || elem opt xx then act >> return () else return ()
let ifxx opt act = if elem opt xx then act >> return () else return ()
let pres = elem "present" xx
let dir = if pres then "../present" else "../alltenses"
let optl ls = maybe ls id $ getOptLangs xx
ifx "lang" $ do
mapM_ (gfc pres [] . lang) (optl langsLang)
copy "*/*.gfo" dir
ifx "compat" $ do
mapM_ (gfc pres [] . compat) (optl langsCompat)
copy "*/Compatibility*.gfo" dir
ifx "api" $ do
mapM_ (gfc pres presApiPath . try) (optl langsAPI)
copy "*/*.gfo" dir
ifx "math" $ do
mapM_ (gfc False [] . math) (optl langsMath)
copy "mathematical/*.gfo" "../mathematical"
mapM_ (gfc False [] . symbolic) (optl langsMath)
copy "mathematical/Symbolic*.gfo" "../mathematical"
ifxx "pgf" $ do
run_gfc $ ["-s","--make","--name=langs","--parser=off",
"--output-dir=" ++ dir]
++ [dir ++ "/Lang" ++ la ++ ".gfo" | (_,la) <- optl langsPGF]
ifxx "test" $ do
let ls = optl langsTest
gf (treeb "Lang" ls) $ unwords [dir ++ "/Lang" ++ la ++ ".gfo" | (_,la) <- ls]
ifxx "demo" $ do
let ls = optl langsDemo
gf (demos "Demo" ls) $ unwords ["demo/Demo" ++ la ++ ".gf" | (_,la) <- ls]
ifxx "clean" $ do
system "rm -f */*.gfo ../alltenses/*.gfo ../present/*.gfo"
ifxx "clone" $ do
let (pref,lang) = case getLangName xx of
Just pl -> pl
_ -> error "expected flag option file=ppppppLLL.gf"
s <- readFile (pref ++ lang ++ ".gf")
mapM_ (\la -> writeFile (pref ++ la ++ ".gf") (replaceLang lang la s)) (map snd (optl langs))
return ()
gfc pres ppath file = do
let preproc = if pres then "-preproc=./mkPresent" else ""
let path = if pres then ppath else ""
putStrLn $ "Compiling " ++ file
run_gfc ["-s","-src", preproc, path, file]
gf comm file = do
putStrLn $ "Reading " ++ file
let cmd = "echo \"" ++ comm ++ "\" | gf -s " ++ file
putStrLn cmd
system cmd
treeb abstr ls = "rf -lines -tree -file=" ++ treebankExx ++
" | l -treebank " ++ unlexer abstr ls ++ " | wf -file=" ++ treebankResults
demos abstr ls = "gr -number=100 | l -treebank " ++ unlexer abstr ls ++
" | ps -to_html | wf -file=resdemo.html"
lang (lla,la) = lla ++ "/All" ++ la ++ ".gf"
compat (lla,la) = lla ++ "/Compatibility" ++ la ++ ".gf"
try (lla,la) = "api/Try" ++ la ++ ".gf"
math (lla,la) = "mathematical/Mathematical" ++ la ++ ".gf"
symbolic (lla,la) = "mathematical/Symbolic" ++ la ++ ".gf"
except ls es = filter (flip notElem es . snd) ls
only ls es = filter (flip elem es . snd) ls
-- list of languages overriding the definitions above
getOptLangs args = case [ls | a <- args, let (f,ls) = splitAt 6 a, f=="langs="] of
ls:_ -> return $ findLangs $ seps ls
_ -> Nothing
where
seps = words . map (\c -> if c==',' then ' ' else c)
findLangs ls = [lang | lang@(_,la) <- langs, elem la ls]
-- the file name has the form p....pLLL.gf, i.e. 3-letter lang name, suffix .gf
getLangName args = case [ls | a <- args, let (f,ls) = splitAt 5 a, f=="file="] of
fi:_ -> let (nal,ferp) = splitAt 3 (drop 3 (reverse fi)) in return (reverse ferp,reverse nal)
_ -> Nothing
replaceLang s1 s2 = repl where
repl s = case s of
c:cs -> case splitAt lgs s of
(pre,rest) | pre == s1 -> s2 ++ repl rest
_ -> c : repl cs
_ -> s
lgs = 3 -- length s1
unlexer abstr ls =
"-unlexer=\\\"" ++ unwords
[abstr ++ la ++ "=" ++ unl |
lla@(_,la) <- ls, let unl = unlex lla, not (null unl)] ++
"\\\""
where
unlex lla = maybe "" id $ lookup lla langsCoding
-- | Runs the gf executable in compile mode with the given arguments.
run_gfc :: [String] -> IO ()
run_gfc args =
do let args' = ["-batch","-gf-lib-path=../"] ++ filter (not . null) args ++ ["+RTS"] ++ rts_flags ++ ["-RTS"]
putStrLn $ "Running: " ++ default_gf ++ " " ++ unwords (map showArg args')
e <- rawSystem default_gf args'
case e of
ExitSuccess -> return ()
ExitFailure i -> putStrLn $ "gf exited with exit code: " ++ show i
where rts_flags = ["-K100M"]
showArg arg = "'" ++ arg ++ "'"
copy :: String -> String -> IO ()
copy from to =
do system $ "cp " ++ from ++ " " ++ to
return ()

View File

@@ -1,49 +0,0 @@
RUNGHC=runghc
RUNMAKE=$(RUNGHC) Make.hs
.PHONY: all present alltenses lang api math test demo synopsis link compiled clean
all: link present alltenses math
present:
$(RUNMAKE) present lang
$(RUNMAKE) present api
alltenses:
$(RUNMAKE) lang
$(RUNMAKE) api
lang:
$(RUNMAKE) lang
api:
$(RUNMAKE) api
math:
$(RUNMAKE) math
compat:
gf -batch */Compatibility*.gf
cp -p */Compatibility*.gfo ../alltenses/
cp -p */Compatibility*.gfo ../present/
# $(RUNMAKE) present compat
# $(RUNMAKE) compat
test:
$(RUNMAKE) test
demo:
$(RUNMAKE) demo
synopsis:
cd doc ; $(RUNGHC) MkSynopsis ; cd ..
link:
chmod a+x mkPresent
compiled:
(cd .. && tar -zcf resource-compiled.tar.gz prelude alltenses present mathematical)
clean:
$(RUNMAKE) clean

View File

@@ -1,30 +0,0 @@
--1 Adjective: Adjectives and Adjectival Phrases
abstract Adjective = Cat ** {
fun
-- The principal ways of forming an adjectival phrase are
-- positive, comparative, relational, reflexive-relational, and
-- elliptic-relational.
-- (The superlative use is covered in [Noun Noun.html].$SuperlA$.)
PositA : A -> AP ; -- warm
ComparA : A -> NP -> AP ; -- warmer than I
ComplA2 : A2 -> NP -> AP ; -- married to her
ReflA2 : A2 -> AP ; -- married to itself
UseA2 : A2 -> A ; -- married
-- Sentence and question complements defined for all adjectival
-- phrases, although the semantics is only clear for some adjectives.
SentAP : AP -> SC -> AP ; -- good that she is here
-- An adjectival phrase can be modified by an *adadjective*, such as "very".
AdAP : AdA -> AP -> AP ; -- very warm
-- The formation of adverbs from adjective (e.g. "quickly") is covered
-- in [Adverb Adverb.html].
}

View File

@@ -1,32 +0,0 @@
--1 Adverb: Adverbs and Adverbial Phrases
abstract Adverb = Cat ** {
fun
-- The two main ways of forming adverbs are from adjectives and by
-- prepositions from noun phrases.
PositAdvAdj : A -> Adv ; -- warmly
PrepNP : Prep -> NP -> Adv ; -- in the house
-- Comparative adverbs have a noun phrase or a sentence as object of
-- comparison.
ComparAdvAdj : CAdv -> A -> NP -> Adv ; -- more warmly than John
ComparAdvAdjS : CAdv -> A -> S -> Adv ; -- more warmly than he runs
-- Adverbs can be modified by 'adadjectives', just like adjectives.
AdAdv : AdA -> Adv -> Adv ; -- very quickly
-- Subordinate clauses can function as adverbs.
SubjS : Subj -> S -> Adv ; -- when she sleeps
-- Comparison adverbs also work as numeral adverbs.
AdnCAdv : CAdv -> AdN ; -- less (than five)
}

View File

@@ -1,63 +0,0 @@
--1 Obsolete constructs included for backward-compatibility
abstract Backward = Cat ** {
-- from Cat
cat
Slash ;
fun
-- from Verb 19/4/2008
ComplV2 : V2 -> NP -> VP ; -- use it
ComplV3 : V3 -> NP -> NP -> VP ; -- send a message to her
ComplV2V : V2V -> NP -> VP -> VP ; -- cause it to burn
ComplV2S : V2S -> NP -> S -> VP ; -- tell me that it rains
ComplV2Q : V2Q -> NP -> QS -> VP ; -- ask me who came
ComplV2A : V2A -> NP -> AP -> VP ; -- paint it red
ReflV2 : V2 -> VP ; -- use itself
UseVQ : VQ -> V2 ; -- ask (a question)
UseVS : VS -> V2 ; -- know (a secret)
-- from Sentence 19/4/2008
SlashV2 : NP -> V2 -> Slash ; -- (whom) he sees
SlashVVV2 : NP -> VV -> V2 -> Slash; -- (whom) he wants to see
-- from Noun 19/4/2008
NumInt : Int -> Num ; -- 51
OrdInt : Int -> Ord ; -- 51st (DEPRECATED)
NoOrd : Ord ;
-- 20/4
DetSg : Quant -> Ord -> Det ; -- the best man
DetPl : Quant -> Num -> Ord -> Det ; -- the five best men
NoNum : Num ;
-- 22/4
DefArt : Quant ; -- the (house), the (houses)
IndefArt : Quant ; -- a (house), (houses)
MassDet : Quant ; -- (beer)
-- from Structural 19/4/2008
that_NP : NP ;
these_NP : NP ;
this_NP : NP ;
those_NP : NP ;
whichPl_IDet : IDet ;
whichSg_IDet : IDet ;
-- from Adverb
AdvSC : SC -> Adv ; -- that he arrives ---- REMOVE?
}

View File

@@ -1,134 +0,0 @@
--1 Cat: the Category System
-- The category system is central to the library in the sense
-- that the other modules ($Adjective$, $Adverb$, $Noun$, $Verb$ etc)
-- communicate through it. This means that a e.g. a function using
-- $NP$s in $Verb$ need not know how $NP$s are constructed in $Noun$:
-- it is enough that both $Verb$ and $Noun$ use the same type $NP$,
-- which is given here in $Cat$.
--
-- Some categories are inherited from [``Common`` Common.html].
-- The reason they are defined there is that they have the same
-- implementation in all languages in the resource (typically,
-- just a string). These categories are
-- $AdA, AdN, AdV, Adv, Ant, CAdv, IAdv, PConj, Phr$,
-- $Pol, SC, Tense, Text, Utt, Voc$.
--
-- Moreover, the list categories $ListAdv, ListAP, ListNP, ListS$
-- are defined on $Conjunction$ and only used locally there.
abstract Cat = Common ** {
cat
--2 Sentences and clauses
-- Constructed in [Sentence Sentence.html], and also in
-- [Idiom Idiom.html].
S ; -- declarative sentence e.g. "she lived here"
QS ; -- question e.g. "where did she live"
RS ; -- relative e.g. "in which she lived"
Cl ; -- declarative clause, with all tenses e.g. "she looks at this"
ClSlash;-- clause missing NP (S/NP in GPSG) e.g. "she looks at"
SSlash ;-- sentence missing NP e.g. "she has looked at"
Imp ; -- imperative e.g. "look at this"
--2 Questions and interrogatives
-- Constructed in [Question Question.html].
QCl ; -- question clause, with all tenses e.g. "why does she walk"
IP ; -- interrogative pronoun e.g. "who"
IComp ; -- interrogative complement of copula e.g. "where"
IDet ; -- interrogative determiner e.g. "how many"
IQuant; -- interrogative quantifier e.g. "which"
--2 Relative clauses and pronouns
-- Constructed in [Relative Relative.html].
RCl ; -- relative clause, with all tenses e.g. "in which she lives"
RP ; -- relative pronoun e.g. "in which"
--2 Verb phrases
-- Constructed in [Verb Verb.html].
VP ; -- verb phrase e.g. "is very warm"
Comp ; -- complement of copula, such as AP e.g. "very warm"
VPSlash ; -- verb phrase missing complement e.g. "give to John"
--2 Adjectival phrases
-- Constructed in [Adjective Adjective.html].
AP ; -- adjectival phrase e.g. "very warm"
--2 Nouns and noun phrases
-- Constructed in [Noun Noun.html].
-- Many atomic noun phrases e.g. "everybody"
-- are constructed in [Structural Structural.html].
-- The determiner structure is
-- ``` Predet (QuantSg | QuantPl Num) Ord
-- as defined in [Noun Noun.html].
CN ; -- common noun (without determiner) e.g. "red house"
NP ; -- noun phrase (subject or object) e.g. "the red house"
Pron ; -- personal pronoun e.g. "she"
Det ; -- determiner phrase e.g. "those seven"
Predet ; -- predeterminer (prefixed Quant) e.g. "all"
Quant ; -- quantifier ('nucleus' of Det) e.g. "this/these"
Art ; -- article e.g. "the"
Num ; -- number determining element e.g. "seven"
Card ; -- cardinal number e.g. "seven"
Ord ; -- ordinal number (used in Det) e.g. "seventh"
--2 Numerals
-- Constructed in [Numeral Numeral.html].
Numeral ; -- cardinal or ordinal in words e.g. "five/fifth"
Digits ; -- cardinal or ordinal in digits e.g. "1,000/1,000th"
--2 Structural words
-- Constructed in [Structural Structural.html].
Conj ; -- conjunction e.g. "and"
---b DConj ; -- distributed conjunction e.g. "both - and"
Subj ; -- subjunction e.g. "if"
Prep ; -- preposition, or just case e.g. "in"
--2 Words of open classes
-- These are constructed in [Lexicon Lexicon.html] and in
-- additional lexicon modules.
V ; -- one-place verb e.g. "sleep"
V2 ; -- two-place verb e.g. "love"
V3 ; -- three-place verb e.g. "show"
VV ; -- verb-phrase-complement verb e.g. "want"
VS ; -- sentence-complement verb e.g. "claim"
VQ ; -- question-complement verb e.g. "wonder"
VA ; -- adjective-complement verb e.g. "look"
V2V ; -- verb with NP and V complement e.g. "cause"
V2S ; -- verb with NP and S complement e.g. "tell"
V2Q ; -- verb with NP and Q complement e.g. "ask"
V2A ; -- verb with NP and AP complement e.g. "paint"
A ; -- one-place adjective e.g. "warm"
A2 ; -- two-place adjective e.g. "divisible"
N ; -- common noun e.g. "house"
N2 ; -- relational noun e.g. "son"
N3 ; -- three-place relational noun e.g. "connection"
PN ; -- proper name e.g. "Paris"
-- DEPRECATED: QuantSg, QuantPl
--- QuantSg ;-- quantifier ('nucleus' of sing. Det) e.g. "every"
--- QuantPl ;-- quantifier ('nucleus' of plur. Det) e.g. "many"
}

View File

@@ -1,61 +0,0 @@
--1 Common: Structures with Common Implementations.
-- This module defines the categories that uniformly have the linearization
-- ${s : Str}$ in all languages.
-- Moreover, this module defines the abstract parameters of tense, polarity, and
-- anteriority, which are used in [``Phrase`` Phrase.html] to generate different
-- forms of sentences. Together they give 4 x 2 x 2 = 16 sentence forms.
-- These tenses are defined for all languages in the library. More tenses
-- can be defined in the language extensions, e.g. the "passé simple" of
-- Romance languages in [``ExtraRomance`` ../romance/ExtraRomance.gf].
abstract Common = {
cat
--2 Top-level units
-- Constructed in [``Text`` Text.html]: $Text$.
Text ; -- text consisting of several phrases e.g. "He is here. Why?"
-- Constructed in [``Phrase`` Phrase.html]:
Phr ; -- phrase in a text e.g. "but be quiet please"
Utt ; -- sentence, question, word... e.g. "be quiet"
Voc ; -- vocative or "please" e.g. "my darling"
PConj ; -- phrase-beginning conjunction e.g. "therefore"
-- Constructed in [``Sentence`` Sentence.html]:
SC ; -- embedded sentence or question e.g. "that it rains"
--2 Adverbs
-- Constructed in [``Adverb`` Adverb.html].
-- Many adverbs are constructed in [``Structural`` Structural.html].
Adv ; -- verb-phrase-modifying adverb e.g. "in the house"
AdV ; -- adverb directly attached to verb e.g. "always"
AdA ; -- adjective-modifying adverb e.g. "very"
AdN ; -- numeral-modifying adverb e.g. "more than"
IAdv ; -- interrogative adverb e.g. "why"
CAdv ; -- comparative adverb e.g. "more"
--2 Tense, polarity, and anteriority
Tense ; -- tense e.g. present, past, future
Pol ; -- polarity e.g. positive, negative
Ant ; -- anteriority e.g. simultaneous, anterior
fun
PPos, PNeg : Pol ; -- I sleep/don't sleep
TPres : Tense ;
ASimul : Ant ;
TPast, TFut, TCond : Tense ; -- I slept/will sleep/would sleep --# notpresent
AAnter : Ant ; -- I have slept --# notpresent
}

View File

@@ -1,9 +0,0 @@
abstract Compatibility = Cat ** {
-- from Noun 19/4/2008
fun
NumInt : Int -> Num ; -- 57
OrdInt : Int -> Ord ; -- 57
}

View File

@@ -1,52 +0,0 @@
--1 Conjunction: Coordination
-- Coordination is defined for many different categories; here is
-- a sample. The rules apply to *lists* of two or more elements,
-- and define two general patterns:
-- - ordinary conjunction: X,...X and X
-- - distributed conjunction: both X,...,X and X
--
--
-- $VP$ conjunctions are not covered here, because their applicability
-- depends on language. Some special cases are defined in
-- [``Extra`` ../abstract/Extra.gf].
abstract Conjunction = Cat ** {
--2 Rules
fun
ConjS : Conj -> [S] -> S ; -- "he walks and she runs"
ConjAP : Conj -> [AP] -> AP ; -- "cold and warm"
ConjNP : Conj -> [NP] -> NP ; -- "she or we"
ConjAdv : Conj -> [Adv] -> Adv ; -- "here or there"
---b DConjS : DConj -> [S] -> S ; -- "either he walks or she runs"
---b DConjAP : DConj -> [AP] -> AP ; -- "both warm and cold"
---b DConjNP : DConj -> [NP] -> NP ; -- "either he or she"
---b DConjAdv : DConj -> [Adv] -> Adv; -- "both here and there"
--2 Categories
-- These categories are only used in this module.
cat
[S]{2} ;
[Adv]{2} ;
[NP]{2} ;
[AP]{2} ;
--2 List constructors
-- The list constructors are derived from the list notation and therefore
-- not given explicitly. But here are their type signatures:
-- BaseC : C -> C -> [C] ; -- for C = S, AP, NP, Adv
-- ConsC : C -> [C] -> [C] ;
}
--.
-- *Note*. This module uses right-recursive lists. If backward
-- compatibility with API 0.9 is needed, use
-- [SeqConjunction SeqConjunction.html].

View File

@@ -1,30 +0,0 @@
--1 More syntax rules
-- This module defines syntax rules that are not implemented in all
-- languages, but in more than one, so that it makes sense to offer a
-- common API.
abstract Extra = Cat ** {
fun
GenNP : NP -> Quant ; -- this man's
ComplBareVS : VS -> S -> VP ; -- know you go
StrandRelSlash : RP -> ClSlash -> RCl ; -- that he lives in
EmptyRelSlash : RP -> ClSlash -> RCl ; -- he lives in
StrandQuestSlash : IP -> ClSlash -> QCl ; -- whom does John live with
-- $VP$ conjunction, which has different fragments implemented in
-- different languages - never a full $VP$, though.
cat
VPI ;
[VPI] {2} ;
fun
MkVPI : VP -> VPI ;
ConjVPI : Conj -> [VPI] -> VPI ;
ComplVPIVV : VV -> VPI -> VP ;
}

View File

@@ -1,22 +0,0 @@
--1 Grammar: the Main Module of the Resource Grammar
-- This grammar is a collection of the different grammar modules,
-- To test the resource, import [``Lang`` Lang.html], which also contains
-- a lexicon.
abstract Grammar =
Noun,
Verb,
Adjective,
Adverb,
Numeral,
Sentence,
Question,
Relative,
Conjunction,
Phrase,
Text,
Structural,
Idiom ;

View File

@@ -1,22 +0,0 @@
--1 Idiom: Idiomatic Expressions
abstract Idiom = Cat ** {
-- This module defines constructions that are formed in fixed ways,
-- often different even in closely related languages.
fun
ImpersCl : VP -> Cl ; -- it is hot
GenericCl : VP -> Cl ; -- one sleeps
CleftNP : NP -> RS -> Cl ; -- it is I who did it
CleftAdv : Adv -> S -> Cl ; -- it is here she slept
ExistNP : NP -> Cl ; -- there is a house
ExistIP : IP -> QCl ; -- which houses are there
ProgrVP : VP -> VP ; -- be sleeping
ImpPl1 : VP -> Utt ; -- let's go
}

View File

@@ -1,13 +0,0 @@
--1 Lang: a Test Module for the Resource Grammar
-- This grammar is for testing the resource as included in the
-- language-independent API, consisting of a grammar and a lexicon.
-- The grammar without a lexicon is [``Grammar`` Grammar.html],
-- which may be more suitable to open in applications.
abstract Lang =
Grammar,
Lexicon
** {
flags startcat=Phr ;
} ;

View File

@@ -1,356 +0,0 @@
abstract Lexicon = Cat ** {
fun
add_V3 : V3 ;
airplane_N : N ;
already_Adv : Adv ;
animal_N : N ;
answer_V2S : V2S ;
apartment_N : N ;
apple_N : N ;
art_N : N ;
ashes_N : N ;
ask_V2Q : V2Q ;
baby_N : N ;
back_N : N ;
bad_A : A ;
bank_N : N ;
bark_N : N ;
beautiful_A : A ;
become_VA : VA ;
beer_N : N ;
beg_V2V : V2V ;
belly_N : N ;
big_A : A ;
bike_N : N ;
bird_N : N ;
bite_V2 : V2 ;
black_A : A ;
blood_N : N ;
blow_V : V ;
blue_A : A ;
boat_N : N ;
bone_N : N ;
book_N : N ;
boot_N : N ;
boss_N : N ;
boy_N : N ;
bread_N : N ;
break_V2 : V2 ;
breast_N : N ;
breathe_V : V ;
broad_A : A ;
brother_N2 : N2 ;
brown_A : A ;
burn_V : V ;
butter_N : N ;
buy_V2 : V2 ;
camera_N : N ;
cap_N : N ;
car_N : N ;
carpet_N : N ;
cat_N : N ;
ceiling_N : N ;
chair_N : N ;
cheese_N : N ;
child_N : N ;
church_N : N ;
city_N : N ;
clean_A : A ;
clever_A : A ;
close_V2 : V2 ;
cloud_N : N ;
coat_N : N ;
cold_A : A ;
come_V : V ;
computer_N : N ;
correct_A : A ;
country_N : N ;
count_V2 : V2 ;
cousin_N : N ;
cow_N : N ;
cut_V2 : V2 ;
day_N : N ;
die_V : V ;
dig_V : V ;
dirty_A : A ;
distance_N3 : N3 ;
doctor_N : N ;
dog_N : N ;
door_N : N ;
do_V2 : V2 ;
drink_V2 : V2 ;
drink_V2 : V2 ;
dry_A : A ;
dull_A : A ;
dust_N : N ;
ear_N : N ;
earth_N : N ;
easy_A2V : A2 ;
eat_V2 : V2 ;
eat_V2 : V2 ;
egg_N : N ;
empty_A : A ;
enemy_N : N ;
eye_N : N ;
factory_N : N ;
fall_V : V ;
far_Adv : Adv ;
father_N2 : N2 ;
fat_N : N ;
fear_VS : VS ;
fear_V2 : V2 ;
feather_N : N ;
fight_V2 : V2 ;
find_V2 : V2 ;
fingernail_N : N ;
fire_N : N ;
fish_N : N ;
float_V : V ;
floor_N : N ;
flower_N : N ;
flow_V : V ;
fly_V : V ;
fog_N : N ;
foot_N : N ;
forest_N : N ;
forget_V2 : V2 ;
freeze_V : V ;
fridge_N : N ;
friend_N : N ;
fruit_N : N ;
full_A : A ;
fun
fun_AV : A ;
garden_N : N ;
girl_N : N ;
give_V3 : V3 ;
glove_N : N ;
gold_N : N ;
good_A : A ;
go_V : V ;
grammar_N : N ;
grass_N : N ;
green_A : A ;
guts_N : N ;
hair_N : N ;
hand_N : N ;
harbour_N : N ;
hate_V2 : V2 ;
hat_N : N ;
have_V2 : V2 ;
head_N : N ;
heart_N : N ;
hear_V2 : V2 ;
hear_V2 : V2 ;
heavy_A : A ;
hill_N : N ;
hit_V2 : V2 ;
hold_V2 : V2 ;
hope_VS : VS ;
horn_N : N ;
horse_N : N ;
hot_A : A ;
house_N : N ;
hunt_V2 : V2 ;
husband_N : N ;
ice_N : N ;
important_A : A ;
industry_N : N ;
iron_N : N ;
john_PN : PN ;
jump_V : V ;
kill_V2 : V2 ;
king_N : N ;
knee_N : N ;
know_V2 : V2 ;
know_V2 : V2 ;
lake_N : N ;
lamp_N : N ;
language_N : N ;
laugh_V : V ;
leaf_N : N ;
learn_V2 : V2 ;
leather_N : N ;
leave_V2 : V2 ;
left_Ord : Ord ;
leg_N : N ;
lie_V : V ;
like_V2 : V2 ;
listen_V2 : V2 ;
liver_N : N ;
live_V : V ;
long_A : A ;
lose_V2 : V2 ;
louse_N : N ;
love_N : N ;
love_V2 : V2 ;
man_N : N ;
married_A2 : A2 ;
meat_N : N ;
milk_N : N ;
moon_N : N ;
mother_N2 : N2 ;
mountain_N : N ;
mouth_N : N ;
music_N : N ;
name_N : N ;
narrow_A : A ;
near_A : A ;
neck_N : N ;
new_A : A ;
newspaper_N : N ;
night_N : N ;
nose_N : N ;
now_Adv : Adv ;
number_N : N ;
oil_N : N ;
old_A : A ;
open_V2 : V2 ;
paint_V2A : V2A ;
paper_N : N ;
paris_PN : PN ;
peace_N : N ;
pen_N : N ;
person_N : N ;
planet_N : N ;
plastic_N : N ;
play_V2 : V2 ;
play_V : V ;
policeman_N : N ;
priest_N : N ;
probable_AS : A ;
pull_V2 : V2 ;
push_V2 : V2 ;
put_V2 : V2 ;
queen_N : N ;
question_N : N ;
radio_N : N ;
rain_N : N ;
rain_V0 : V ;
read_V2 : V2 ;
ready_A : A ;
reason_N : N ;
red_A : A ;
religion_N : N ;
restaurant_N : N ;
right_Ord : Ord ;
river_N : N ;
road_N : N ;
rock_N : N ;
roof_N : N ;
root_N : N ;
rope_N : N ;
rotten_A : A ;
round_A : A ;
rubber_N : N ;
rub_V2 : V2 ;
rule_N : N ;
run_V : V ;
salt_N : N ;
sand_N : N ;
say_VS : VS ;
school_N : N ;
science_N : N ;
scratch_V2 : V2 ;
sea_N : N ;
seed_N : N ;
seek_V2 : V2 ;
see_V2 : V2 ;
see_V2 : V2 ;
sell_V3 : V3 ;
send_V3 : V3 ;
sew_V : V ;
sharp_A : A ;
sheep_N : N ;
ship_N : N ;
shirt_N : N ;
shoe_N : N ;
shop_N : N ;
short_A : A ;
silver_N : N ;
sing_V : V ;
sister_N : N ;
sit_V : V ;
skin_N : N ;
sky_N : N ;
sleep_V : V ;
small_A : A ;
smell_V : V ;
smoke_N : N ;
smooth_A : A ;
snake_N : N ;
snow_N : N ;
sock_N : N ;
song_N : N ;
speak_V2 : V2 ;
spit_V : V ;
split_V2 : V2 ;
squeeze_V2 : V2 ;
stab_V2 : V2 ;
stand_V : V ;
star_N : N ;
steel_N : N ;
stick_N : N ;
stone_N : N ;
stop_V : V ;
stove_N : N ;
straight_A : A ;
student_N : N ;
stupid_A : A ;
suck_V2 : V2 ;
sun_N : N ;
swell_V : V ;
swim_V : V ;
switch8off_V2 : V2 ;
switch8on_V2 : V2 ;
table_N : N ;
tail_N : N ;
talk_V3 : V3 ;
teacher_N : N ;
teach_V2 : V2 ;
television_N : N ;
thick_A : A ;
thin_A : A ;
think_V : V ;
throw_V2 : V2 ;
tie_V2 : V2 ;
today_Adv : Adv ;
tongue_N : N ;
tooth_N : N ;
train_N : N ;
travel_V : V ;
tree_N : N ;
turn_V : V ;
ugly_A : A ;
uncertain_A : A ;
understand_V2 : V2 ;
university_N : N ;
village_N : N ;
vomit_V : V ;
wait_V2 : V2 ;
walk_V : V ;
warm_A : A ;
war_N : N ;
wash_V2 : V2 ;
watch_V2 : V2 ;
water_N : N ;
wet_A : A ;
white_A : A ;
wide_A : A ;
wife_N : N ;
wind_N : N ;
window_N : N ;
wine_N : N ;
wing_N : N ;
win_V2 : V2 ;
wipe_V2 : V2 ;
woman_N : N ;
wonder_VQ : VQ ;
wood_N : N ;
worm_N : N ;
write_V2 : V2 ;
year_N : N ;
yellow_A : A ;
young_A : A ;
}

View File

@@ -1,144 +0,0 @@
--1 Noun: Nouns, noun phrases, and determiners
abstract Noun = Cat ** {
--2 Noun phrases
-- The three main types of noun phrases are
-- - common nouns with determiners
-- - proper names
-- - pronouns
--
--
fun
DetCN : Det -> CN -> NP ; -- the man
UsePN : PN -> NP ; -- John
UsePron : Pron -> NP ; -- he
-- Pronouns are defined in the module [``Structural`` Structural.html].
-- A noun phrase already formed can be modified by a $Predet$erminer.
PredetNP : Predet -> NP -> NP; -- only the man
-- A noun phrase can also be postmodified by the past participle of a
-- verb, by an adverb, or by a relative clause
PPartNP : NP -> V2 -> NP ; -- the man seen
AdvNP : NP -> Adv -> NP ; -- Paris today
RelNP : NP -> RS -> NP ; -- Paris, which is here
-- Determiners can form noun phrases directly.
DetNP : Det -> NP ; -- these five
--2 Determiners
-- The determiner has a fine-grained structure, in which a 'nucleus'
-- quantifier and two optional parts can be discerned: a cardinal and
-- an ordinal numeral.
DetQuantOrd : Quant -> Num -> Ord -> Det ; -- these five best
DetQuant : Quant -> Num -> Det ; -- these five
-- Whether the resulting determiner is singular or plural depends on the
-- cardinal.
-- All parts of the determiner can be empty, except $Quant$, which is
-- the "kernel" of a determiner. It is, however, the $Num$ that determines
-- the inherent number.
NumSg : Num ;
NumPl : Num ;
NumCard : Card -> Num ;
-- $Card$ consists of either digits or numeral words.
NumDigits : Digits -> Card ; -- 51
NumNumeral : Numeral -> Card ; -- fifty-one
-- The construction of numerals is defined in [Numeral Numeral.html].
-- A $Card$ can be modified by certain adverbs.
AdNum : AdN -> Card -> Card ; -- almost 51
-- An $Ord$ consists of either digits or numeral words.
-- Also superlative forms of adjectives behave syntactically like ordinals.
OrdDigits : Digits -> Ord ; -- 51st
OrdNumeral : Numeral -> Ord ; -- fifty-first
OrdSuperl : A -> Ord ; -- warmest
-- Definite and indefinite noun phrases are sometimes realized as
-- neatly distinct words (Spanish "un, unos ; el, los") but also without
-- any particular word (Finnish; Swedish definites).
DetArtOrd : Art -> Num -> Ord -> Det ; -- the (five) best
DetArtCard : Art -> Card -> Det ; -- the five
IndefArt : Art ;
DefArt : Art ;
-- Articles cannot alone form noun phrases, but need a noun.
DetArtSg : Art -> CN -> NP ; -- the man
DetArtPl : Art -> CN -> NP ; -- the men
-- Nouns can be used without an article as mass nouns. The resource does
-- not distinguish mass nouns from other common nouns, which can result
-- in semantically odd expressions.
MassNP : CN -> NP ; -- (beer)
-- Pronouns have possessive forms. Genitives of other kinds
-- of noun phrases are not given here, since they are not possible
-- in e.g. Romance languages. They can be found in $Extra$ modules.
PossPron : Pron -> Quant ; -- my (house)
-- Other determiners are defined in [Structural Structural.html].
--2 Common nouns
-- Simple nouns can be used as nouns outright.
UseN : N -> CN ; -- house
-- Relational nouns take one or two arguments.
ComplN2 : N2 -> NP -> CN ; -- mother of the king
ComplN3 : N3 -> NP -> N2 ; -- distance from this city (to Paris)
-- Relational nouns can also be used without their arguments.
-- The semantics is typically derivative of the relational meaning.
UseN2 : N2 -> CN ; -- mother
Use2N3 : N3 -> N2 ; -- distance (from this city)
Use3N3 : N3 -> N2 ; -- distance (to Paris)
-- Nouns can be modified by adjectives, relative clauses, and adverbs
-- (the last rule will give rise to many 'PP attachment' ambiguities
-- when used in connection with verb phrases).
AdjCN : AP -> CN -> CN ; -- big house
RelCN : CN -> RS -> CN ; -- house that John bought
AdvCN : CN -> Adv -> CN ; -- house on the hill
-- Nouns can also be modified by embedded sentences and questions.
-- For some nouns this makes little sense, but we leave this for applications
-- to decide. Sentential complements are defined in [Verb Verb.html].
SentCN : CN -> SC -> CN ; -- question where she sleeps
--2 Apposition
-- This is certainly overgenerating.
ApposCN : CN -> NP -> CN ; -- city Paris (, numbers x and y)
} ;

View File

@@ -1,60 +0,0 @@
--1 Numerals
-- This grammar defines numerals from 1 to 999999.
-- The implementations are adapted from the
-- [numerals library http://www.cs.chalmers.se/~aarne/GF/examples/numerals/]
-- which defines numerals for 88 languages.
-- The resource grammar implementations add to this inflection (if needed)
-- and ordinal numbers.
--
-- *Note* 1. Number 1 as defined
-- in the category $Numeral$ here should not be used in the formation of
-- noun phrases, and should therefore be removed. Instead, one should use
-- [Structural Structural.html]$.one_Quant$. This makes the grammar simpler
-- because we can assume that numbers form plural noun phrases.
--
-- *Note* 2. The implementations introduce spaces between
-- parts of a numeral, which is often incorrect - more work on
-- (un)lexing is needed to solve this problem.
abstract Numeral = Cat ** {
cat
Digit ; -- 2..9
Sub10 ; -- 1..9
Sub100 ; -- 1..99
Sub1000 ; -- 1..999
Sub1000000 ; -- 1..999999
fun
num : Sub1000000 -> Numeral ;
n2, n3, n4, n5, n6, n7, n8, n9 : Digit ;
pot01 : Sub10 ; -- 1
pot0 : Digit -> Sub10 ; -- d * 1
pot110 : Sub100 ; -- 10
pot111 : Sub100 ; -- 11
pot1to19 : Digit -> Sub100 ; -- 10 + d
pot0as1 : Sub10 -> Sub100 ; -- coercion of 1..9
pot1 : Digit -> Sub100 ; -- d * 10
pot1plus : Digit -> Sub10 -> Sub100 ; -- d * 10 + n
pot1as2 : Sub100 -> Sub1000 ; -- coercion of 1..99
pot2 : Sub10 -> Sub1000 ; -- m * 100
pot2plus : Sub10 -> Sub100 -> Sub1000 ; -- m * 100 + n
pot2as3 : Sub1000 -> Sub1000000 ; -- coercion of 1..999
pot3 : Sub1000 -> Sub1000000 ; -- m * 1000
pot3plus : Sub1000 -> Sub1000 -> Sub1000000 ; -- m * 1000 + n
-- Numerals as sequences of digits have a separate, simpler grammar
cat
Dig ; -- single digit 0..9
fun
IDig : Dig -> Digits ; -- 8
IIDig : Dig -> Digits -> Digits ; -- 876
D_0, D_1, D_2, D_3, D_4, D_5, D_6, D_7, D_8, D_9 : Dig ;
}

View File

@@ -1,44 +0,0 @@
--1 Phrase: Phrases and Utterances
abstract Phrase = Cat ** {
-- When a phrase is built from an utterance it can be prefixed
-- with a phrasal conjunction (such as "but", "therefore")
-- and suffixing with a vocative (typically a noun phrase).
fun
PhrUtt : PConj -> Utt -> Voc -> Phr ; -- but come here, my friend
-- Utterances are formed from sentences, questions, and imperatives.
UttS : S -> Utt ; -- John walks
UttQS : QS -> Utt ; -- is it good
UttImpSg : Pol -> Imp -> Utt; -- (don't) love yourself
UttImpPl : Pol -> Imp -> Utt; -- (don't) love yourselves
UttImpPol : Pol -> Imp -> Utt ; -- (don't) sleep (polite)
-- There are also 'one-word utterances'. A typical use of them is
-- as answers to questions.
-- *Note*. This list is incomplete. More categories could be covered.
-- Moreover, in many languages e.g. noun phrases in different cases
-- can be used.
UttIP : IP -> Utt ; -- who
UttIAdv : IAdv -> Utt ; -- why
UttNP : NP -> Utt ; -- this man
UttAdv : Adv -> Utt ; -- here
UttVP : VP -> Utt ; -- to sleep
-- The phrasal conjunction is optional. A sentence conjunction
-- can also used to prefix an utterance.
NoPConj : PConj ;
PConjConj : Conj -> PConj ; -- and
-- The vocative is optional. Any noun phrase can be made into vocative,
-- which may be overgenerating (e.g. "I").
NoVoc : Voc ;
VocNP : NP -> Voc ; -- my friend
}

View File

@@ -1,41 +0,0 @@
--1 Question: Questions and Interrogative Pronouns
abstract Question = Cat ** {
-- A question can be formed from a clause ('yes-no question') or
-- with an interrogative.
fun
QuestCl : Cl -> QCl ; -- does John walk
QuestVP : IP -> VP -> QCl ; -- who walks
QuestSlash : IP -> ClSlash -> QCl ; -- whom does John love
QuestIAdv : IAdv -> Cl -> QCl ; -- why does John walk
QuestIComp : IComp -> NP -> QCl ; -- where is John
-- Interrogative pronouns can be formed with interrogative
-- determiners, with or without a noun.
IdetCN : IDet -> CN -> IP ; -- which five songs
IdetIP : IDet -> IP ; -- which five
-- They can be modified with adverbs.
AdvIP : IP -> Adv -> IP ; -- who in Paris
-- Interrogative quantifiers have number forms and can take number modifiers.
IdetQuant : IQuant -> Num -> IDet ; -- which (five)
-- Interrogative adverbs can be formed prepositionally.
PrepIP : Prep -> IP -> IAdv ; -- with whom
-- Interrogative complements to copulas can be both adverbs and
-- pronouns.
CompIAdv : IAdv -> IComp ; -- where (is it)
CompIP : IP -> IComp ; -- who (is it)
-- More $IP$, $IDet$, and $IAdv$ are defined in $Structural$.
}

View File

@@ -1,26 +0,0 @@
--1 Relative clauses and pronouns
abstract Relative = Cat ** {
fun
-- The simplest way to form a relative clause is from a clause by
-- a pronoun similar to "such that".
RelCl : Cl -> RCl ; -- such that John loves her
-- The more proper ways are from a verb phrase
-- (formed in [``Verb`` Verb.html]) or a sentence
-- with a missing noun phrase (formed in [``Sentence`` Sentence.html]).
RelVP : RP -> VP -> RCl ; -- who loves John
RelSlash : RP -> ClSlash -> RCl ; -- whom John loves
-- Relative pronouns are formed from an 'identity element' by prefixing
-- or suffixing (depending on language) prepositional phrases.
IdRP : RP ; -- which
FunRP : Prep -> NP -> RP -> RP ; -- the mother of whom
}

View File

@@ -1,95 +0,0 @@
--1 Sentence: Sentences, Clauses, and Imperatives
abstract Sentence = Cat ** {
--2 Clauses
-- The $NP VP$ predication rule form a clause whose linearization
-- gives a table of all tense variants, positive and negative.
-- Clauses are converted to $S$ (with fixed tense) with the
-- $UseCl$ function below.
fun
PredVP : NP -> VP -> Cl ; -- John walks
-- Using an embedded sentence as a subject is treated separately.
-- This can be overgenerating. E.g. "whether you go" as subject
-- is only meaningful for some verb phrases.
PredSCVP : SC -> VP -> Cl ; -- that she goes is good
--2 Clauses missing object noun phrases
-- This category is a variant of the 'slash category' $S/NP$ of
-- GPSG and categorial grammars, which in turn replaces
-- movement transformations in the formation of questions
-- and relative clauses. Except $SlashV2$, the construction
-- rules can be seen as special cases of function composition, in
-- the style of CCG.
-- *Note* the set is not complete and lacks e.g. verbs with more than 2 places.
SlashVP : NP -> VPSlash -> ClSlash ; -- (whom) he sees
AdvSlash : ClSlash -> Adv -> ClSlash ; -- (whom) he sees today
SlashPrep : Cl -> Prep -> ClSlash ; -- (with whom) he walks
SlashVS : NP -> VS -> SSlash -> ClSlash ; -- (whom) she says that he loves
--2 Imperatives
-- An imperative is straightforwardly formed from a verb phrase.
-- It has variation over positive and negative, singular and plural.
-- To fix these parameters, see [Phrase Phrase.html].
ImpVP : VP -> Imp ; -- love yourselves
--2 Embedded sentences
-- Sentences, questions, and infinitival phrases can be used as
-- subjects and (adverbial) complements.
EmbedS : S -> SC ; -- that she goes
EmbedQS : QS -> SC ; -- who goes
EmbedVP : VP -> SC ; -- to go
--2 Sentences
-- These are the 2 x 4 x 4 = 16 forms generated by different
-- combinations of tense, polarity, and
-- anteriority, which are defined in [``Common`` Common.html].
fun
UseCl : Tense -> Ant -> Pol -> Cl -> S ;
UseQCl : Tense -> Ant -> Pol -> QCl -> QS ;
UseRCl : Tense -> Ant -> Pol -> RCl -> RS ;
UseSlash : Tense -> Ant -> Pol -> ClSlash -> SSlash ;
-- An adverb can be added to the beginning of a sentence.
AdvS : Adv -> S -> S ; -- today, I will go home
-- A sentence can be modified by a relative clause referring to its contents.
RelS : S -> RS -> S ; -- she sleeps, which is good
}
--.
-- Examples for English $S$/$Cl$:
{-
Pres Simul Pos ODir : he sleeps
Pres Simul Neg ODir : he doesn't sleep
Pres Anter Pos ODir : he has slept
Pres Anter Neg ODir : he hasn't slept
Past Simul Pos ODir : he slept
Past Simul Neg ODir : he didn't sleep
Past Anter Pos ODir : he had slept
Past Anter Neg ODir : he hadn't slept
Fut Simul Pos ODir : he will sleep
Fut Simul Neg ODir : he won't sleep
Fut Anter Pos ODir : he will have slept
Fut Anter Neg ODir : he won't have slept
Cond Simul Pos ODir : he would sleep
Cond Simul Neg ODir : he wouldn't sleep
Cond Anter Pos ODir : he would have slept
Cond Anter Neg ODir : he wouldn't have slept
-}

View File

@@ -1,111 +0,0 @@
--1 Structural: Structural Words
--
-- Here we have some words belonging to closed classes and appearing
-- in all languages we have considered.
-- Sometimes more distinctions are needed, e.g. $we_Pron$ in Spanish
-- should be replaced by masculine and feminine variants, found in
-- [``ExtraSpa`` ../spanish/ExtraSpa.gf].
abstract Structural = Cat ** {
fun
-- This is an alphabetical list of structural words
above_Prep : Prep ;
after_Prep : Prep ;
all_Predet : Predet ;
almost_AdA : AdA ;
almost_AdN : AdN ;
although_Subj : Subj ;
always_AdV : AdV ;
and_Conj : Conj ;
because_Subj : Subj ;
before_Prep : Prep ;
behind_Prep : Prep ;
between_Prep : Prep ;
both7and_DConj : Conj ; -- both...and
---b both7and_DConj : DConj ;
but_PConj : PConj ;
by8agent_Prep : Prep ; -- by (agent)
by8means_Prep : Prep ; -- by (means of)
can8know_VV : VV ; -- can (capacity)
can_VV : VV ; -- can (possibility)
during_Prep : Prep ;
either7or_DConj : Conj ; -- either...or
---b either7or_DConj : DConj ;
every_Det : Det ;
everybody_NP : NP ; -- everybody
everything_NP : NP ;
everywhere_Adv : Adv ;
--- first_Ord : Ord ; DEPRECATED
few_Det : Det ;
for_Prep : Prep ;
from_Prep : Prep ;
he_Pron : Pron ;
here_Adv : Adv ;
here7to_Adv : Adv ; -- to here
here7from_Adv : Adv ; -- from here
how_IAdv : IAdv ;
how8many_IDet : IDet ;
i_Pron : Pron ;
if_Subj : Subj ;
in8front_Prep : Prep ; -- in front of
in_Prep : Prep ;
it_Pron : Pron ;
less_CAdv : CAdv ;
many_Det : Det ;
more_CAdv : CAdv ;
most_Predet : Predet ;
much_Det : Det ;
must_VV : VV ;
---b no_Phr : Phr ;
no_Utt : Utt ;
on_Prep : Prep ;
--- one_Quant : QuantSg ; DEPRECATED
only_Predet : Predet ;
or_Conj : Conj ;
otherwise_PConj : PConj ;
part_Prep : Prep ;
please_Voc : Voc ;
possess_Prep : Prep ; -- of (possessive)
quite_Adv : AdA ;
she_Pron : Pron ;
so_AdA : AdA ;
someSg_Det : Det ;
somePl_Det : Det ;
somebody_NP : NP ;
something_NP : NP ;
somewhere_Adv : Adv ;
that_Quant : Quant ;
there_Adv : Adv ;
there7to_Adv : Adv ; -- to there
there7from_Adv : Adv ; -- from there
therefore_PConj : PConj ;
they_Pron : Pron ;
this_Quant : Quant ;
through_Prep : Prep ;
to_Prep : Prep ;
too_AdA : AdA ;
under_Prep : Prep ;
very_AdA : AdA ;
want_VV : VV ;
we_Pron : Pron ;
whatPl_IP : IP ; -- what (plural)
whatSg_IP : IP ; -- what (singular)
when_IAdv : IAdv ;
when_Subj : Subj ;
where_IAdv : IAdv ;
which_IQuant : IQuant ;
whoPl_IP : IP ; -- who (plural)
whoSg_IP : IP ; -- who (singular)
why_IAdv : IAdv ;
with_Prep : Prep ;
without_Prep : Prep ;
---b yes_Phr : Phr ;
yes_Utt : Utt ;
youSg_Pron : Pron ; -- you (singular)
youPl_Pron : Pron ; -- you (plural)
youPol_Pron : Pron ; -- you (polite)
}

View File

@@ -1,15 +0,0 @@
--1 Text: Texts
-- Texts are built from an empty text by adding $Phr$ases,
-- using as constructors the punctuation marks ".", "?", and "!".
-- Any punctuation mark can be attached to any kind of phrase.
abstract Text = Common ** {
fun
TEmpty : Text ; --
TFullStop : Phr -> Text -> Text ; -- John walks. ...
TQuestMark : Phr -> Text -> Text ; -- Are they here? ...
TExclMark : Phr -> Text -> Text ; -- Let's go! ...
}

View File

@@ -1,71 +0,0 @@
--1 The construction of verb phrases
abstract Verb = Cat ** {
--2 Complementization rules
-- Verb phrases are constructed from verbs by providing their
-- complements. There is one rule for each verb category.
fun
UseV : V -> VP ; -- sleep
ComplVV : VV -> VP -> VP ; -- want to run
ComplVS : VS -> S -> VP ; -- say that she runs
ComplVQ : VQ -> QS -> VP ; -- wonder who runs
ComplVA : VA -> AP -> VP ; -- they become red
SlashV2a : V2 -> VPSlash ; -- love (it)
Slash2V3 : V3 -> NP -> VPSlash ; -- give it (to her)
Slash3V3 : V3 -> NP -> VPSlash ; -- give (it) to her
SlashV2V : V2V -> VP -> VPSlash ; -- beg (her) to go
SlashV2S : V2S -> S -> VPSlash ; -- answer (to him) that it is good
SlashV2Q : V2Q -> QS -> VPSlash ; -- ask (him) who came
SlashV2A : V2A -> AP -> VPSlash ; -- paint (it) red
ComplSlash : VPSlash -> NP -> VP ; -- love it
SlashVV : VV -> VPSlash -> VPSlash ; -- want to buy
SlashV2VNP : V2V -> NP -> VPSlash -> VPSlash ; -- beg me to buy
--2 Other ways of forming verb phrases
-- Verb phrases can also be constructed reflexively and from
-- copula-preceded complements.
ReflVP : VPSlash -> VP ; -- love himself
UseComp : Comp -> VP ; -- be warm
-- Passivization of two-place verbs is another way to use
-- them. In many languages, the result is a participle that
-- is used as complement to a copula ("is used"), but other
-- auxiliary verbs are possible (Ger. "wird angewendet", It.
-- "viene usato"), as well as special verb forms (Fin. "käytetään",
-- Swe. "används").
--
-- *Note*. the rule can be overgenerating, since the $V2$ need not
-- take a direct object.
PassV2 : V2 -> VP ; -- be loved
-- Adverbs can be added to verb phrases. Many languages make
-- a distinction between adverbs that are attached in the end
-- vs. next to (or before) the verb.
AdvVP : VP -> Adv -> VP ; -- sleep here
AdVVP : AdV -> VP -> VP ; -- always sleep
-- *Agents of passives* are constructed as adverbs with the
-- preposition [Structural Structural.html]$.8agent_Prep$.
--2 Complements to copula
-- Adjectival phrases, noun phrases, and adverbs can be used.
CompAP : AP -> Comp ; -- (be) small
CompNP : NP -> Comp ; -- (be) a man
CompAdv : Adv -> Comp ; -- (be) here
}

Some files were not shown because too many files have changed in this diff Show More