removed GF/deprecated
260
deprecated/FILES
@@ -1,260 +0,0 @@
|
|||||||
|
|
||||||
Code map for GF source files.
|
|
||||||
|
|
||||||
$Author: peb $
|
|
||||||
$Date: 2005/02/07 10:58:08 $
|
|
||||||
|
|
||||||
Directories:
|
|
||||||
|
|
||||||
[top level] GF main function and runtime-related modules
|
|
||||||
api high-level access to GF functionalities
|
|
||||||
canonical GFC (= GF Canonical) basic functionalities
|
|
||||||
cf context-free skeleton used in parsing
|
|
||||||
cfgm multilingual context-free skeleton exported to Java
|
|
||||||
compile compilation phases from GF to GFC
|
|
||||||
conversions [OBSOLETE] formats used in parser generation
|
|
||||||
for-ghc GHC-specific files (Glasgow Haskell Compiler)
|
|
||||||
for-hugs Hugs-specific files (a Haskell interpreter)
|
|
||||||
for-windows Windows-specific files (an operating system from Microsoft)
|
|
||||||
grammar basic functionalities of GF grammars used in compilation
|
|
||||||
infra GF-independent infrastructure and auxiliaries
|
|
||||||
newparsing parsing with GF grammars: current version (cf. parsing)
|
|
||||||
notrace debugging utilities for parser development (cf. trace)
|
|
||||||
parsers parsers of GF and GFC files
|
|
||||||
parsing [OBSOLETE] parsing with GF grammars: old version (cf. newparsing)
|
|
||||||
shell interaction shells
|
|
||||||
source utilities for reading in GF source files
|
|
||||||
speech generation of speech recognition grammars
|
|
||||||
trace debugging utilities for parser development (cf. notrace)
|
|
||||||
useGrammar grammar functionalities for applications
|
|
||||||
util utilities for using GF
|
|
||||||
|
|
||||||
|
|
||||||
Individual files:
|
|
||||||
|
|
||||||
GF.hs the Main module
|
|
||||||
GFModes.hs
|
|
||||||
HelpFile.hs [AUTO] help file generated by util/MkHelpFile
|
|
||||||
Today.hs [AUTO] file generated by "make today"
|
|
||||||
|
|
||||||
api/API.hs high-level access to GF functionalities
|
|
||||||
api/BatchTranslate.hs
|
|
||||||
api/GetMyTree.hs
|
|
||||||
api/GrammarToHaskell.hs
|
|
||||||
api/IOGrammar.hs
|
|
||||||
api/MyParser.hs slot for defining your own parser
|
|
||||||
|
|
||||||
canonical/AbsGFC.hs [AUTO] abstract syntax of GFC
|
|
||||||
canonical/CanonToGrammar.hs
|
|
||||||
canonical/CMacros.hs
|
|
||||||
canonical/ErrM.hs
|
|
||||||
canonical/GetGFC.hs
|
|
||||||
canonical/GFC.cf [LBNF] source of GFC parser
|
|
||||||
canonical/GFC.hs
|
|
||||||
canonical/LexGFC.hs
|
|
||||||
canonical/Look.hs
|
|
||||||
canonical/MkGFC.hs
|
|
||||||
canonical/PrExp.hs
|
|
||||||
canonical/PrintGFC.hs pretty-printer of GFC
|
|
||||||
canonical/Share.hs
|
|
||||||
canonical/SkelGFC.hs [AUTO]
|
|
||||||
canonical/TestGFC.hs [AUTO]
|
|
||||||
canonical/Unlex.hs
|
|
||||||
|
|
||||||
cf/CanonToCF.hs
|
|
||||||
cf/CF.hs abstract syntax of context-free grammars
|
|
||||||
cf/CFIdent.hs
|
|
||||||
cf/CFtoGrammar.hs
|
|
||||||
cf/CFtoSRG.hs
|
|
||||||
cf/ChartParser.hs the current default parsing method
|
|
||||||
cf/EBNF.hs
|
|
||||||
cf/PPrCF.hs
|
|
||||||
cf/PrLBNF.hs
|
|
||||||
cf/Profile.hs
|
|
||||||
|
|
||||||
cfgm/CFG.cf [LBNF] source
|
|
||||||
cfgm/AbsCFG.hs [AUTO]
|
|
||||||
cfgm/LexCFG.hs [AUTO]
|
|
||||||
cfgm/ParCFG.hs [AUTO]
|
|
||||||
cfgm/PrintCFG.hs [AUTO]
|
|
||||||
cfgm/PrintCFGrammar.hs
|
|
||||||
|
|
||||||
compile/CheckGrammar.hs
|
|
||||||
compile/Compile.hs the complete compiler pipeline
|
|
||||||
compile/Extend.hs
|
|
||||||
compile/GetGrammar.hs
|
|
||||||
compile/GrammarToCanon.hs
|
|
||||||
compile/MkResource.hs
|
|
||||||
compile/MkUnion.hs
|
|
||||||
compile/ModDeps.hs
|
|
||||||
compile/Optimize.hs
|
|
||||||
compile/PGrammar.hs
|
|
||||||
compile/PrOld.hs
|
|
||||||
compile/Rebuild.hs
|
|
||||||
compile/RemoveLiT.hs
|
|
||||||
compile/Rename.hs
|
|
||||||
compile/ShellState.hs the run-time multilingual grammar datastructure
|
|
||||||
compile/Update.hs
|
|
||||||
|
|
||||||
for-ghc/ArchEdit.hs
|
|
||||||
for-ghc/Arch.hs
|
|
||||||
|
|
||||||
for-ghc-nofud/ArchEdit.hs@
|
|
||||||
for-ghc-nofud/Arch.hs@
|
|
||||||
|
|
||||||
for-hugs/ArchEdit.hs
|
|
||||||
for-hugs/Arch.hs
|
|
||||||
for-hugs/JGF.hs
|
|
||||||
for-hugs/MoreCustom.hs
|
|
||||||
for-hugs/Unicode.hs
|
|
||||||
|
|
||||||
for-hugs/Arch.hs
|
|
||||||
for-hugs/ArchEdit.hs
|
|
||||||
for-hugs/JGF.hs
|
|
||||||
for-hugs/LexCFG.hs dummy CFG lexer
|
|
||||||
for-hugs/LexGF.hs dummy GF lexer
|
|
||||||
for-hugs/LexGFC.hs dummy GFC lexer
|
|
||||||
for-hugs/MoreCustom.hs
|
|
||||||
for-hugs/ParCFG.hs dummy CFG parser
|
|
||||||
for-hugs/ParGFC.hs dummy GFC parser
|
|
||||||
for-hugs/ParGF.hs dummy GF parser
|
|
||||||
for-hugs/Tracing.hs
|
|
||||||
for-hugs/Unicode.hs
|
|
||||||
|
|
||||||
for-windows/ArchEdit.hs
|
|
||||||
for-windows/Arch.hs
|
|
||||||
|
|
||||||
grammar/AbsCompute.hs
|
|
||||||
grammar/Abstract.hs GF and GFC abstract syntax datatypes
|
|
||||||
grammar/AppPredefined.hs
|
|
||||||
grammar/Compute.hs
|
|
||||||
grammar/Grammar.hs GF source grammar datatypes
|
|
||||||
grammar/LookAbs.hs
|
|
||||||
grammar/Lookup.hs
|
|
||||||
grammar/Macros.hs macros for creating GF terms and types
|
|
||||||
grammar/MMacros.hs more macros, mainly for abstract syntax
|
|
||||||
grammar/PatternMatch.hs
|
|
||||||
grammar/PrGrammar.hs the top-level grammar printer
|
|
||||||
grammar/Refresh.hs
|
|
||||||
grammar/ReservedWords.hs
|
|
||||||
grammar/TC.hs Coquand's type checking engine
|
|
||||||
grammar/TypeCheck.hs
|
|
||||||
grammar/Unify.hs
|
|
||||||
grammar/Values.hs
|
|
||||||
|
|
||||||
infra/Arabic.hs ASCII coding of Arabic Unicode
|
|
||||||
infra/Assoc.hs finite maps/association lists as binary search trees
|
|
||||||
infra/CheckM.hs
|
|
||||||
infra/Comments.hs
|
|
||||||
infra/Devanagari.hs ASCII coding of Devanagari Unicode
|
|
||||||
infra/ErrM.hs
|
|
||||||
infra/Ethiopic.hs
|
|
||||||
infra/EventF.hs
|
|
||||||
infra/ExtendedArabic.hs
|
|
||||||
infra/ExtraDiacritics.hs
|
|
||||||
infra/FudgetOps.hs
|
|
||||||
infra/Glue.hs
|
|
||||||
infra/Greek.hs
|
|
||||||
infra/Hebrew.hs
|
|
||||||
infra/Hiragana.hs
|
|
||||||
infra/Ident.hs
|
|
||||||
infra/LatinASupplement.hs
|
|
||||||
infra/Map.hs finite maps as red black trees
|
|
||||||
infra/Modules.hs
|
|
||||||
infra/OCSCyrillic.hs
|
|
||||||
infra/Operations.hs library of strings, search trees, error monads
|
|
||||||
infra/Option.hs
|
|
||||||
infra/OrdMap2.hs abstract class of finite maps + implementation as association lists
|
|
||||||
infra/OrdSet.hs abstract class of sets + implementation as sorted lists
|
|
||||||
infra/Parsers.hs
|
|
||||||
infra/ReadFiles.hs
|
|
||||||
infra/RedBlack.hs red black trees
|
|
||||||
infra/RedBlackSet.hs sets and maps as red black trees
|
|
||||||
infra/Russian.hs
|
|
||||||
infra/SortedList.hs sets as sorted lists
|
|
||||||
infra/Str.hs
|
|
||||||
infra/Tamil.hs
|
|
||||||
infra/Text.hs
|
|
||||||
infra/Trie2.hs
|
|
||||||
infra/Trie.hs
|
|
||||||
infra/UnicodeF.hs
|
|
||||||
infra/Unicode.hs
|
|
||||||
infra/UseIO.hs
|
|
||||||
infra/UTF8.hs UTF3 en/decoding
|
|
||||||
infra/Zipper.hs
|
|
||||||
|
|
||||||
newparsing/CFGrammar.hs type definitions for context-free grammars
|
|
||||||
newparsing/CFParserGeneral.hs several variants of general CFG chart parsing
|
|
||||||
newparsing/CFParserIncremental.hs several variants of incremental (Earley-style) CFG chart parsing
|
|
||||||
newparsing/ConvertGFCtoMCFG.hs converting GFC to MCFG
|
|
||||||
newparsing/ConvertGrammar.hs conversions between different grammar formats
|
|
||||||
newparsing/ConvertMCFGtoCFG.hs converting MCFG to CFG
|
|
||||||
newparsing/GeneralChart.hs Haskell framework for "parsing as deduction"
|
|
||||||
newparsing/GrammarTypes.hs instantiations of grammar types
|
|
||||||
newparsing/IncrementalChart.hs Haskell framework for incremental chart parsing
|
|
||||||
newparsing/MCFGrammar.hs type definitions for multiple CFG
|
|
||||||
newparsing/MCFParserBasic.hs MCFG chart parser
|
|
||||||
newparsing/MCFRange.hs ranges for MCFG parsing
|
|
||||||
newparsing/ParseCFG.hs parsing of CFG
|
|
||||||
newparsing/ParseCF.hs parsing of the CF format
|
|
||||||
newparsing/ParseGFC.hs parsing of GFC
|
|
||||||
newparsing/ParseMCFG.hs parsing of MCFG
|
|
||||||
newparsing/Parser.hs general definitions for parsers
|
|
||||||
newparsing/PrintParser.hs pretty-printing class for parsers
|
|
||||||
newparsing/PrintSimplifiedTerm.hs simplified pretty-printing for GFC terms
|
|
||||||
|
|
||||||
notrace/Tracing.hs tracing predicates when we DON'T want tracing capabilities (normal case)
|
|
||||||
|
|
||||||
parsers/ParGFC.hs [AUTO]
|
|
||||||
parsers/ParGF.hs [AUTO]
|
|
||||||
|
|
||||||
shell/CommandF.hs
|
|
||||||
shell/CommandL.hs line-based syntax of editor commands
|
|
||||||
shell/Commands.hs commands of GF editor shell
|
|
||||||
shell/IDE.hs
|
|
||||||
shell/JGF.hs
|
|
||||||
shell/PShell.hs
|
|
||||||
shell/ShellCommands.hs commands of GF main shell
|
|
||||||
shell/Shell.hs
|
|
||||||
shell/SubShell.hs
|
|
||||||
shell/TeachYourself.hs
|
|
||||||
|
|
||||||
source/AbsGF.hs [AUTO]
|
|
||||||
source/ErrM.hs
|
|
||||||
source/GF.cf [LBNF] source of GF parser
|
|
||||||
source/GrammarToSource.hs
|
|
||||||
source/LexGF.hs [AUTO]
|
|
||||||
source/PrintGF.hs [AUTO]
|
|
||||||
source/SourceToGrammar.hs
|
|
||||||
|
|
||||||
speech/PrGSL.hs
|
|
||||||
speech/PrJSGF.hs
|
|
||||||
speech/SRG.hs
|
|
||||||
speech/TransformCFG.hs
|
|
||||||
|
|
||||||
trace/Tracing.hs tracing predicates when we want tracing capabilities
|
|
||||||
|
|
||||||
translate/GFT.hs Main module of html-producing batch translator
|
|
||||||
|
|
||||||
useGrammar/Custom.hs database for customizable commands
|
|
||||||
useGrammar/Editing.hs
|
|
||||||
useGrammar/Generate.hs
|
|
||||||
useGrammar/GetTree.hs
|
|
||||||
useGrammar/Information.hs
|
|
||||||
useGrammar/Linear.hs the linearization algorithm
|
|
||||||
useGrammar/MoreCustom.hs
|
|
||||||
useGrammar/Morphology.hs
|
|
||||||
useGrammar/Paraphrases.hs
|
|
||||||
useGrammar/Parsing.hs the top-level parsing algorithm
|
|
||||||
useGrammar/Randomized.hs
|
|
||||||
useGrammar/RealMoreCustom.hs
|
|
||||||
useGrammar/Session.hs
|
|
||||||
useGrammar/TeachYourself.hs
|
|
||||||
useGrammar/Tokenize.hs lexer definitions (listed in Custom)
|
|
||||||
useGrammar/Transfer.hs
|
|
||||||
|
|
||||||
util/GFDoc.hs utility for producing LaTeX and HTML from GF
|
|
||||||
util/HelpFile source of ../HelpFile.hs
|
|
||||||
util/Htmls.hs utility for chopping a HTML document to slides
|
|
||||||
util/MkHelpFile.hs
|
|
||||||
util/WriteF.hs
|
|
||||||
@@ -1,693 +0,0 @@
|
|||||||
-- GF help file updated for GF 2.6, 17/6/2006.
|
|
||||||
-- *: Commands and options marked with * are currently not implemented.
|
|
||||||
--
|
|
||||||
-- Each command has a long and a short name, options, and zero or more
|
|
||||||
-- arguments. Commands are sorted by functionality. The short name is
|
|
||||||
-- given first.
|
|
||||||
|
|
||||||
-- Type "h -all" for full help file, "h <CommandName>" for full help on a command.
|
|
||||||
|
|
||||||
-- commands that change the state
|
|
||||||
|
|
||||||
i, import: i File
|
|
||||||
Reads a grammar from File and compiles it into a GF runtime grammar.
|
|
||||||
Files "include"d in File are read recursively, nubbing repetitions.
|
|
||||||
If a grammar with the same language name is already in the state,
|
|
||||||
it is overwritten - but only if compilation succeeds.
|
|
||||||
The grammar parser depends on the file name suffix:
|
|
||||||
.gf normal GF source
|
|
||||||
.gfc canonical GF
|
|
||||||
.gfr precompiled GF resource
|
|
||||||
.gfcm multilingual canonical GF
|
|
||||||
.gfe example-based grammar files (only with the -ex option)
|
|
||||||
.gfwl multilingual word list (preprocessed to abs + cncs)
|
|
||||||
.ebnf Extended BNF format
|
|
||||||
.cf Context-free (BNF) format
|
|
||||||
.trc TransferCore format
|
|
||||||
options:
|
|
||||||
-old old: parse in GF<2.0 format (not necessary)
|
|
||||||
-v verbose: give lots of messages
|
|
||||||
-s silent: don't give error messages
|
|
||||||
-src from source: ignore precompiled gfc and gfr files
|
|
||||||
-gfc from gfc: use compiled modules whenever they exist
|
|
||||||
-retain retain operations: read resource modules (needed in comm cc)
|
|
||||||
-nocf don't build old-style context-free grammar (default without HOAS)
|
|
||||||
-docf do build old-style context-free grammar (default with HOAS)
|
|
||||||
-nocheckcirc don't eliminate circular rules from CF
|
|
||||||
-cflexer build an optimized parser with separate lexer trie
|
|
||||||
-noemit do not emit code (default with old grammar format)
|
|
||||||
-o do emit code (default with new grammar format)
|
|
||||||
-ex preprocess .gfe files if needed
|
|
||||||
-prob read probabilities from top grammar file (format --# prob Fun Double)
|
|
||||||
-treebank read a treebank file to memory (xml format)
|
|
||||||
flags:
|
|
||||||
-abs set the name used for abstract syntax (with -old option)
|
|
||||||
-cnc set the name used for concrete syntax (with -old option)
|
|
||||||
-res set the name used for resource (with -old option)
|
|
||||||
-path use the (colon-separated) search path to find modules
|
|
||||||
-optimize select an optimization to override file-defined flags
|
|
||||||
-conversion select parsing method (values strict|nondet)
|
|
||||||
-probs read probabilities from file (format (--# prob) Fun Double)
|
|
||||||
-preproc use a preprocessor on each source file
|
|
||||||
-noparse read nonparsable functions from file (format --# noparse Funs)
|
|
||||||
examples:
|
|
||||||
i English.gf -- ordinary import of Concrete
|
|
||||||
i -retain german/ParadigmsGer.gf -- import of Resource to test
|
|
||||||
|
|
||||||
r, reload: r
|
|
||||||
Executes the previous import (i) command.
|
|
||||||
|
|
||||||
rl, remove_language: rl Language
|
|
||||||
Takes away the language from the state.
|
|
||||||
|
|
||||||
e, empty: e
|
|
||||||
Takes away all languages and resets all global flags.
|
|
||||||
|
|
||||||
sf, set_flags: sf Flag*
|
|
||||||
The values of the Flags are set for Language. If no language
|
|
||||||
is specified, the flags are set globally.
|
|
||||||
examples:
|
|
||||||
sf -nocpu -- stop showing CPU time
|
|
||||||
sf -lang=Swe -- make Swe the default concrete
|
|
||||||
|
|
||||||
s, strip: s
|
|
||||||
Prune the state by removing source and resource modules.
|
|
||||||
|
|
||||||
dc, define_command Name Anything
|
|
||||||
Add a new defined command. The Name must star with '%'. Later,
|
|
||||||
if 'Name X' is used, it is replaced by Anything where #1 is replaced
|
|
||||||
by X.
|
|
||||||
Restrictions: Currently at most one argument is possible, and a defined
|
|
||||||
command cannot appear in a pipe.
|
|
||||||
To see what definitions are in scope, use help -defs.
|
|
||||||
examples:
|
|
||||||
dc %tnp p -cat=NP -lang=Eng #1 | l -lang=Swe -- translate NPs
|
|
||||||
%tnp "this man" -- translate and parse
|
|
||||||
|
|
||||||
dt, define_term Name Tree
|
|
||||||
Add a constant for a tree. The constant can later be called by
|
|
||||||
prefixing it with '$'.
|
|
||||||
Restriction: These terms are not yet usable as a subterm.
|
|
||||||
To see what definitions are in scope, use help -defs.
|
|
||||||
examples:
|
|
||||||
p -cat=NP "this man" | dt tm -- define tm as parse result
|
|
||||||
l -all $tm -- linearize tm in all forms
|
|
||||||
|
|
||||||
-- commands that give information about the state
|
|
||||||
|
|
||||||
pg, print_grammar: pg
|
|
||||||
Prints the actual grammar (overridden by the -lang=X flag).
|
|
||||||
The -printer=X flag sets the format in which the grammar is
|
|
||||||
written.
|
|
||||||
N.B. since grammars are compiled when imported, this command
|
|
||||||
generally does not show the grammar in the same format as the
|
|
||||||
source. In particular, the -printer=latex is not supported.
|
|
||||||
Use the command tg -printer=latex File to print the source
|
|
||||||
grammar in LaTeX.
|
|
||||||
options:
|
|
||||||
-utf8 apply UTF8-encoding to the grammar
|
|
||||||
flags:
|
|
||||||
-printer
|
|
||||||
-lang
|
|
||||||
-startcat -- The start category of the generated grammar.
|
|
||||||
Only supported by some grammar printers.
|
|
||||||
examples:
|
|
||||||
pg -printer=cf -- show the context-free skeleton
|
|
||||||
|
|
||||||
pm, print_multigrammar: pm
|
|
||||||
Prints the current multilingual grammar in .gfcm form.
|
|
||||||
(Automatically executes the strip command (s) before doing this.)
|
|
||||||
options:
|
|
||||||
-utf8 apply UTF8 encoding to the tokens in the grammar
|
|
||||||
-utf8id apply UTF8 encoding to the identifiers in the grammar
|
|
||||||
examples:
|
|
||||||
pm | wf Letter.gfcm -- print the grammar into the file Letter.gfcm
|
|
||||||
pm -printer=graph | wf D.dot -- then do 'dot -Tps D.dot > D.ps'
|
|
||||||
|
|
||||||
vg, visualize_graph: vg
|
|
||||||
Show the dependency graph of multilingual grammar via dot and gv.
|
|
||||||
|
|
||||||
po, print_options: po
|
|
||||||
Print what modules there are in the state. Also
|
|
||||||
prints those flag values in the current state that differ from defaults.
|
|
||||||
|
|
||||||
pl, print_languages: pl
|
|
||||||
Prints the names of currently available languages.
|
|
||||||
|
|
||||||
pi, print_info: pi Ident
|
|
||||||
Prints information on the identifier.
|
|
||||||
|
|
||||||
-- commands that execute and show the session history
|
|
||||||
|
|
||||||
eh, execute_history: eh File
|
|
||||||
Executes commands in the file.
|
|
||||||
|
|
||||||
ph, print_history; ph
|
|
||||||
Prints the commands issued during the GF session.
|
|
||||||
The result is readable by the eh command.
|
|
||||||
examples:
|
|
||||||
ph | wf foo.hist" -- save the history into a file
|
|
||||||
|
|
||||||
-- linearization, parsing, translation, and computation
|
|
||||||
|
|
||||||
l, linearize: l PattList? Tree
|
|
||||||
Shows all linearization forms of Tree by the actual grammar
|
|
||||||
(which is overridden by the -lang flag).
|
|
||||||
The pattern list has the form [P, ... ,Q] where P,...,Q follow GF
|
|
||||||
syntax for patterns. All those forms are generated that match with the
|
|
||||||
pattern list. Too short lists are filled with variables in the end.
|
|
||||||
Only the -table flag is available if a pattern list is specified.
|
|
||||||
HINT: see GF language specification for the syntax of Pattern and Term.
|
|
||||||
You can also copy and past parsing results.
|
|
||||||
options:
|
|
||||||
-struct bracketed form
|
|
||||||
-table show parameters (not compatible with -record, -all)
|
|
||||||
-record record, i.e. explicit GF concrete syntax term (not compatible with -table, -all)
|
|
||||||
-all show all forms and variants (not compatible with -record, -table)
|
|
||||||
-multi linearize to all languages (can be combined with the other options)
|
|
||||||
flags:
|
|
||||||
-lang linearize in this grammar
|
|
||||||
-number give this number of forms at most
|
|
||||||
-unlexer filter output through unlexer
|
|
||||||
examples:
|
|
||||||
l -lang=Swe -table -- show full inflection table in Swe
|
|
||||||
|
|
||||||
p, parse: p String
|
|
||||||
Shows all Trees returned for String by the actual
|
|
||||||
grammar (overridden by the -lang flag), in the category S (overridden
|
|
||||||
by the -cat flag).
|
|
||||||
options for batch input:
|
|
||||||
-lines parse each line of input separately, ignoring empty lines
|
|
||||||
-all as -lines, but also parse empty lines
|
|
||||||
-prob rank results by probability
|
|
||||||
-cut stop after first lexing result leading to parser success
|
|
||||||
-fail show strings whose parse fails prefixed by #FAIL
|
|
||||||
-ambiguous show strings that have more than one parse prefixed by #AMBIGUOUS
|
|
||||||
options for selecting parsing method:
|
|
||||||
-fcfg parse using a fast variant of MCFG (default is no HOAS in grammar)
|
|
||||||
-old parse using an overgenerating CFG (default if HOAS in grammar)
|
|
||||||
-cfg parse using a much less overgenerating CFG
|
|
||||||
-mcfg parse using an even less overgenerating MCFG
|
|
||||||
Note: the first time parsing with -cfg, -mcfg, and -fcfg may take a long time
|
|
||||||
options that only work for the -old default parsing method:
|
|
||||||
-n non-strict: tolerates morphological errors
|
|
||||||
-ign ignore unknown words when parsing
|
|
||||||
-raw return context-free terms in raw form
|
|
||||||
-v verbose: give more information if parsing fails
|
|
||||||
flags:
|
|
||||||
-cat parse in this category
|
|
||||||
-lang parse in this grammar
|
|
||||||
-lexer filter input through this lexer
|
|
||||||
-parser use this parsing strategy
|
|
||||||
-number return this many results at most
|
|
||||||
examples:
|
|
||||||
p -cat=S -mcfg "jag är gammal" -- parse an S with the MCFG
|
|
||||||
rf examples.txt | p -lines -- parse each non-empty line of the file
|
|
||||||
|
|
||||||
at, apply_transfer: at (Module.Fun | Fun)
|
|
||||||
Transfer a term using Fun from Module, or the topmost transfer
|
|
||||||
module. Transfer modules are given in the .trc format. They are
|
|
||||||
shown by the 'po' command.
|
|
||||||
flags:
|
|
||||||
-lang typecheck the result in this lang instead of default lang
|
|
||||||
examples:
|
|
||||||
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
|
|
||||||
|
|
||||||
tb, tree_bank: tb
|
|
||||||
Generate a multilingual treebank from a list of trees (default) or compare
|
|
||||||
to an existing treebank.
|
|
||||||
options:
|
|
||||||
-c compare to existing xml-formatted treebank
|
|
||||||
-trees return the trees of the treebank
|
|
||||||
-all show all linearization alternatives (branches and variants)
|
|
||||||
-table show tables of linearizations with parameters
|
|
||||||
-record show linearization records
|
|
||||||
-xml wrap the treebank (or comparison results) with XML tags
|
|
||||||
-mem write the treebank in memory instead of a file TODO
|
|
||||||
examples:
|
|
||||||
gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
|
|
||||||
rf tb.xml | tb -c -- compare-test treebank from file
|
|
||||||
rf old.xml | tb -trees | tb -xml -- create new treebank from old
|
|
||||||
|
|
||||||
ut, use_treebank: ut String
|
|
||||||
Lookup a string in a treebank and return the resulting trees.
|
|
||||||
Use 'tb' to create a treebank and 'i -treebank' to read one from
|
|
||||||
a file.
|
|
||||||
options:
|
|
||||||
-assocs show all string-trees associations in the treebank
|
|
||||||
-strings show all strings in the treebank
|
|
||||||
-trees show all trees in the treebank
|
|
||||||
-raw return the lookup result as string, without typechecking it
|
|
||||||
flags:
|
|
||||||
-treebank use this treebank (instead of the latest introduced one)
|
|
||||||
examples:
|
|
||||||
ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation
|
|
||||||
ut -assocs | grep "ComplV2" -- show all associations with ComplV2
|
|
||||||
|
|
||||||
tt, test_tokenizer: tt String
|
|
||||||
Show the token list sent to the parser when String is parsed.
|
|
||||||
HINT: can be useful when debugging the parser.
|
|
||||||
flags:
|
|
||||||
-lexer use this lexer
|
|
||||||
examples:
|
|
||||||
tt -lexer=codelit "2*(x + 3)" -- a favourite lexer for program code
|
|
||||||
|
|
||||||
g, grep: g String1 String2
|
|
||||||
Grep the String1 in the String2. String2 is read line by line,
|
|
||||||
and only those lines that contain String1 are returned.
|
|
||||||
flags:
|
|
||||||
-v return those lines that do not contain String1.
|
|
||||||
examples:
|
|
||||||
pg -printer=cf | grep "mother" -- show cf rules with word mother
|
|
||||||
|
|
||||||
cc, compute_concrete: cc Term
|
|
||||||
Compute a term by concrete syntax definitions. Uses the topmost
|
|
||||||
resource module (the last in listing by command po) to resolve
|
|
||||||
constant names.
|
|
||||||
N.B. You need the flag -retain when importing the grammar, if you want
|
|
||||||
the oper definitions to be retained after compilation; otherwise this
|
|
||||||
command does not expand oper constants.
|
|
||||||
N.B.' The resulting Term is not a term in the sense of abstract syntax,
|
|
||||||
and hence not a valid input to a Tree-demanding command.
|
|
||||||
flags:
|
|
||||||
-table show output in a similar readable format as 'l -table'
|
|
||||||
-res use another module than the topmost one
|
|
||||||
examples:
|
|
||||||
cc -res=ParadigmsFin (nLukko "hyppy") -- inflect "hyppy" with nLukko
|
|
||||||
|
|
||||||
so, show_operations: so Type
|
|
||||||
Show oper operations with the given value type. Uses the topmost
|
|
||||||
resource module to resolve constant names.
|
|
||||||
N.B. You need the flag -retain when importing the grammar, if you want
|
|
||||||
the oper definitions to be retained after compilation; otherwise this
|
|
||||||
command does not find any oper constants.
|
|
||||||
N.B.' The value type may not be defined in a supermodule of the
|
|
||||||
topmost resource. In that case, use appropriate qualified name.
|
|
||||||
flags:
|
|
||||||
-res use another module than the topmost one
|
|
||||||
examples:
|
|
||||||
so -res=ParadigmsFin ResourceFin.N -- show N-paradigms in ParadigmsFin
|
|
||||||
|
|
||||||
t, translate: t Lang Lang String
|
|
||||||
Parses String in Lang1 and linearizes the resulting Trees in Lang2.
|
|
||||||
flags:
|
|
||||||
-cat
|
|
||||||
-lexer
|
|
||||||
-parser
|
|
||||||
examples:
|
|
||||||
t Eng Swe -cat=S "every number is even or odd"
|
|
||||||
|
|
||||||
gr, generate_random: gr Tree?
|
|
||||||
Generates a random Tree of a given category. If a Tree
|
|
||||||
argument is given, the command completes the Tree with values to
|
|
||||||
the metavariables in the tree.
|
|
||||||
options:
|
|
||||||
-prob use probabilities (works for nondep types only)
|
|
||||||
-cf use a very fast method (works for nondep types only)
|
|
||||||
flags:
|
|
||||||
-cat generate in this category
|
|
||||||
-lang use the abstract syntax of this grammar
|
|
||||||
-number generate this number of trees (not impl. with Tree argument)
|
|
||||||
-depth use this number of search steps at most
|
|
||||||
examples:
|
|
||||||
gr -cat=Query -- generate in category Query
|
|
||||||
gr (PredVP ? (NegVG ?)) -- generate a random tree of this form
|
|
||||||
gr -cat=S -tr | l -- gererate and linearize
|
|
||||||
|
|
||||||
gt, generate_trees: gt Tree?
|
|
||||||
Generates all trees up to a given depth. If the depth is large,
|
|
||||||
a small -alts is recommended. If a Tree argument is given, the
|
|
||||||
command completes the Tree with values to the metavariables in
|
|
||||||
the tree.
|
|
||||||
options:
|
|
||||||
-metas also return trees that include metavariables
|
|
||||||
-all generate all (can be infinitely many, lazily)
|
|
||||||
-lin linearize result of -all (otherwise, use pipe to linearize)
|
|
||||||
flags:
|
|
||||||
-depth generate to this depth (default 3)
|
|
||||||
-atoms take this number of atomic rules of each category (default unlimited)
|
|
||||||
-alts take this number of alternatives at each branch (default unlimited)
|
|
||||||
-cat generate in this category
|
|
||||||
-nonub don't remove duplicates (faster, not effective with -mem)
|
|
||||||
-mem use a memorizing algorithm (often faster, usually more memory-consuming)
|
|
||||||
-lang use the abstract syntax of this grammar
|
|
||||||
-number generate (at most) this number of trees (also works with -all)
|
|
||||||
-noexpand don't expand these categories (comma-separated, e.g. -noexpand=V,CN)
|
|
||||||
-doexpand only expand these categories (comma-separated, e.g. -doexpand=V,CN)
|
|
||||||
examples:
|
|
||||||
gt -depth=10 -cat=NP -- generate all NP's to depth 10
|
|
||||||
gt (PredVP ? (NegVG ?)) -- generate all trees of this form
|
|
||||||
gt -cat=S -tr | l -- generate and linearize
|
|
||||||
gt -noexpand=NP | l -mark=metacat -- the only NP is meta, linearized "?0 +NP"
|
|
||||||
gt | l | p -lines -ambiguous | grep "#AMBIGUOUS" -- show ambiguous strings
|
|
||||||
|
|
||||||
ma, morphologically_analyse: ma String
|
|
||||||
Runs morphological analysis on each word in String and displays
|
|
||||||
the results line by line.
|
|
||||||
options:
|
|
||||||
-short show analyses in bracketed words, instead of separate lines
|
|
||||||
-status show just the work at success, prefixed with "*" at failure
|
|
||||||
flags:
|
|
||||||
-lang
|
|
||||||
examples:
|
|
||||||
wf Bible.txt | ma -short | wf Bible.tagged -- analyse the Bible
|
|
||||||
|
|
||||||
|
|
||||||
-- elementary generation of Strings and Trees
|
|
||||||
|
|
||||||
ps, put_string: ps String
|
|
||||||
Returns its argument String, like Unix echo.
|
|
||||||
HINT. The strength of ps comes from the possibility to receive the
|
|
||||||
argument from a pipeline, and altering it by the -filter flag.
|
|
||||||
flags:
|
|
||||||
-filter filter the result through this string processor
|
|
||||||
-length cut the string after this number of characters
|
|
||||||
examples:
|
|
||||||
gr -cat=Letter | l | ps -filter=text -- random letter as text
|
|
||||||
|
|
||||||
pt, put_tree: pt Tree
|
|
||||||
Returns its argument Tree, like a specialized Unix echo.
|
|
||||||
HINT. The strength of pt comes from the possibility to receive
|
|
||||||
the argument from a pipeline, and altering it by the -transform flag.
|
|
||||||
flags:
|
|
||||||
-transform transform the result by this term processor
|
|
||||||
-number generate this number of terms at most
|
|
||||||
examples:
|
|
||||||
p "zero is even" | pt -transform=solve -- solve ?'s in parse result
|
|
||||||
|
|
||||||
* st, show_tree: st Tree
|
|
||||||
Prints the tree as a string. Unlike pt, this command cannot be
|
|
||||||
used in a pipe to produce a tree, since its output is a string.
|
|
||||||
flags:
|
|
||||||
-printer show the tree in a special format (-printer=xml supported)
|
|
||||||
|
|
||||||
wt, wrap_tree: wt Fun
|
|
||||||
Wraps the tree as the sole argument of Fun.
|
|
||||||
flags:
|
|
||||||
-c compute the resulting new tree to normal form
|
|
||||||
|
|
||||||
vt, visualize_tree: vt Tree
|
|
||||||
Shows the abstract syntax tree via dot and gv (via temporary files
|
|
||||||
grphtmp.dot, grphtmp.ps).
|
|
||||||
flags:
|
|
||||||
-c show categories only (no functions)
|
|
||||||
-f show functions only (no categories)
|
|
||||||
-g show as graph (sharing uses of the same function)
|
|
||||||
-o just generate the .dot file
|
|
||||||
examples:
|
|
||||||
p "hello world" | vt -o | wf my.dot ;; ! open -a GraphViz my.dot
|
|
||||||
-- This writes the parse tree into my.dot and opens the .dot file
|
|
||||||
-- with another application without generating .ps.
|
|
||||||
|
|
||||||
-- subshells
|
|
||||||
|
|
||||||
es, editing_session: es
|
|
||||||
Opens an interactive editing session.
|
|
||||||
N.B. Exit from a Fudget session is to the Unix shell, not to GF.
|
|
||||||
options:
|
|
||||||
-f Fudget GUI (necessary for Unicode; only available in X Window System)
|
|
||||||
|
|
||||||
ts, translation_session: ts
|
|
||||||
Translates input lines from any of the actual languages to all other ones.
|
|
||||||
To exit, type a full stop (.) alone on a line.
|
|
||||||
N.B. Exit from a Fudget session is to the Unix shell, not to GF.
|
|
||||||
HINT: Set -parser and -lexer locally in each grammar.
|
|
||||||
options:
|
|
||||||
-f Fudget GUI (necessary for Unicode; only available in X Windows)
|
|
||||||
-lang prepend translation results with language names
|
|
||||||
flags:
|
|
||||||
-cat the parser category
|
|
||||||
examples:
|
|
||||||
ts -cat=Numeral -lang -- translate numerals, show language names
|
|
||||||
|
|
||||||
tq, translation_quiz: tq Lang Lang
|
|
||||||
Random-generates translation exercises from Lang1 to Lang2,
|
|
||||||
keeping score of success.
|
|
||||||
To interrupt, type a full stop (.) alone on a line.
|
|
||||||
HINT: Set -parser and -lexer locally in each grammar.
|
|
||||||
flags:
|
|
||||||
-cat
|
|
||||||
examples:
|
|
||||||
tq -cat=NP TestResourceEng TestResourceSwe -- quiz for NPs
|
|
||||||
|
|
||||||
tl, translation_list: tl Lang Lang
|
|
||||||
Random-generates a list of ten translation exercises from Lang1
|
|
||||||
to Lang2. The number can be changed by a flag.
|
|
||||||
HINT: use wf to save the exercises in a file.
|
|
||||||
flags:
|
|
||||||
-cat
|
|
||||||
-number
|
|
||||||
examples:
|
|
||||||
tl -cat=NP TestResourceEng TestResourceSwe -- quiz list for NPs
|
|
||||||
|
|
||||||
mq, morphology_quiz: mq
|
|
||||||
Random-generates morphological exercises,
|
|
||||||
keeping score of success.
|
|
||||||
To interrupt, type a full stop (.) alone on a line.
|
|
||||||
HINT: use printname judgements in your grammar to
|
|
||||||
produce nice expressions for desired forms.
|
|
||||||
flags:
|
|
||||||
-cat
|
|
||||||
-lang
|
|
||||||
examples:
|
|
||||||
mq -cat=N -lang=TestResourceSwe -- quiz for Swedish nouns
|
|
||||||
|
|
||||||
ml, morphology_list: ml
|
|
||||||
Random-generates a list of ten morphological exercises,
|
|
||||||
keeping score of success. The number can be changed with a flag.
|
|
||||||
HINT: use wf to save the exercises in a file.
|
|
||||||
flags:
|
|
||||||
-cat
|
|
||||||
-lang
|
|
||||||
-number
|
|
||||||
examples:
|
|
||||||
ml -cat=N -lang=TestResourceSwe -- quiz list for Swedish nouns
|
|
||||||
|
|
||||||
|
|
||||||
-- IO related commands
|
|
||||||
|
|
||||||
rf, read_file: rf File
|
|
||||||
Returns the contents of File as a String; error if File does not exist.
|
|
||||||
|
|
||||||
wf, write_file: wf File String
|
|
||||||
Writes String into File; File is created if it does not exist.
|
|
||||||
N.B. the command overwrites File without a warning.
|
|
||||||
|
|
||||||
af, append_file: af File
|
|
||||||
Writes String into the end of File; File is created if it does not exist.
|
|
||||||
|
|
||||||
* tg, transform_grammar: tg File
|
|
||||||
Reads File, parses as a grammar,
|
|
||||||
but instead of compiling further, prints it.
|
|
||||||
The environment is not changed. When parsing the grammar, the same file
|
|
||||||
name suffixes are supported as in the i command.
|
|
||||||
HINT: use this command to print the grammar in
|
|
||||||
another format (the -printer flag); pipe it to wf to save this format.
|
|
||||||
flags:
|
|
||||||
-printer (only -printer=latex supported currently)
|
|
||||||
|
|
||||||
* cl, convert_latex: cl File
|
|
||||||
Reads File, which is expected to be in LaTeX form.
|
|
||||||
Three environments are treated in special ways:
|
|
||||||
\begGF - \end{verbatim}, which contains GF judgements,
|
|
||||||
\begTGF - \end{verbatim}, which contains a GF expression (displayed)
|
|
||||||
\begInTGF - \end{verbatim}, which contains a GF expressions (inlined).
|
|
||||||
Moreover, certain macros should be included in the file; you can
|
|
||||||
get those macros by applying 'tg -printer=latex foo.gf' to any grammar
|
|
||||||
foo.gf. Notice that the same File can be imported as a GF grammar,
|
|
||||||
consisting of all the judgements in \begGF environments.
|
|
||||||
HINT: pipe with 'wf Foo.tex' to generate a new Latex file.
|
|
||||||
|
|
||||||
sa, speak_aloud: sa String
|
|
||||||
Uses the Flite speech generator to produce speech for String.
|
|
||||||
Works for American English spelling.
|
|
||||||
examples:
|
|
||||||
h | sa -- listen to the list of commands
|
|
||||||
gr -cat=S | l | sa -- generate a random sentence and speak it aloud
|
|
||||||
|
|
||||||
si, speech_input: si
|
|
||||||
Uses an ATK speech recognizer to get speech input.
|
|
||||||
flags:
|
|
||||||
-lang: The grammar to use with the speech recognizer.
|
|
||||||
-cat: The grammar category to get input in.
|
|
||||||
-language: Use acoustic model and dictionary for this language.
|
|
||||||
-number: The number of utterances to recognize.
|
|
||||||
|
|
||||||
h, help: h Command?
|
|
||||||
Displays the paragraph concerning the command from this help file.
|
|
||||||
Without the argument, shows the first lines of all paragraphs.
|
|
||||||
options
|
|
||||||
-all show the whole help file
|
|
||||||
-defs show user-defined commands and terms
|
|
||||||
-FLAG show the values of FLAG (works for grammar-independent flags)
|
|
||||||
examples:
|
|
||||||
h print_grammar -- show all information on the pg command
|
|
||||||
|
|
||||||
q, quit: q
|
|
||||||
Exits GF.
|
|
||||||
HINT: you can use 'ph | wf history' to save your session.
|
|
||||||
|
|
||||||
!, system_command: ! String
|
|
||||||
Issues a system command. No value is returned to GF.
|
|
||||||
example:
|
|
||||||
! ls
|
|
||||||
|
|
||||||
?, system_command: ? String
|
|
||||||
Issues a system command that receives its arguments from GF pipe
|
|
||||||
and returns a value to GF.
|
|
||||||
example:
|
|
||||||
h | ? 'wc -l' | p -cat=Num
|
|
||||||
|
|
||||||
|
|
||||||
-- Flags. The availability of flags is defined separately for each command.
|
|
||||||
|
|
||||||
-cat, category in which parsing is performed.
|
|
||||||
The default is S.
|
|
||||||
|
|
||||||
-depth, the search depth in e.g. random generation.
|
|
||||||
The default depends on application.
|
|
||||||
|
|
||||||
-filter, operation performed on a string. The default is identity.
|
|
||||||
-filter=identity no change
|
|
||||||
-filter=erase erase the text
|
|
||||||
-filter=take100 show the first 100 characters
|
|
||||||
-filter=length show the length of the string
|
|
||||||
-filter=text format as text (punctuation, capitalization)
|
|
||||||
-filter=code format as code (spacing, indentation)
|
|
||||||
|
|
||||||
-lang, grammar used when executing a grammar-dependent command.
|
|
||||||
The default is the last-imported grammar.
|
|
||||||
|
|
||||||
-language, voice used by Festival as its --language flag in the sa command.
|
|
||||||
The default is system-dependent.
|
|
||||||
|
|
||||||
-length, the maximum number of characters shown of a string.
|
|
||||||
The default is unlimited.
|
|
||||||
|
|
||||||
-lexer, tokenization transforming a string into lexical units for a parser.
|
|
||||||
The default is words.
|
|
||||||
-lexer=words tokens are separated by spaces or newlines
|
|
||||||
-lexer=literals like words, but GF integer and string literals recognized
|
|
||||||
-lexer=vars like words, but "x","x_...","$...$" as vars, "?..." as meta
|
|
||||||
-lexer=chars each character is a token
|
|
||||||
-lexer=code use Haskell's lex
|
|
||||||
-lexer=codevars like code, but treat unknown words as variables, ?? as meta
|
|
||||||
-lexer=textvars like text, but treat unknown words as variables, ?? as meta
|
|
||||||
-lexer=text with conventions on punctuation and capital letters
|
|
||||||
-lexer=codelit like code, but treat unknown words as string literals
|
|
||||||
-lexer=textlit like text, but treat unknown words as string literals
|
|
||||||
-lexer=codeC use a C-like lexer
|
|
||||||
-lexer=ignore like literals, but ignore unknown words
|
|
||||||
-lexer=subseqs like ignore, but then try all subsequences from longest
|
|
||||||
|
|
||||||
-number, the maximum number of generated items in a list.
|
|
||||||
The default is unlimited.
|
|
||||||
|
|
||||||
-optimize, optimization on generated code.
|
|
||||||
The default is share for concrete, none for resource modules.
|
|
||||||
Each of the flags can have the suffix _subs, which performs
|
|
||||||
common subexpression elimination after the main optimization.
|
|
||||||
Thus, -optimize=all_subs is the most aggressive one. The _subs
|
|
||||||
strategy only works in GFC, and applies therefore in concrete but
|
|
||||||
not in resource modules.
|
|
||||||
-optimize=share share common branches in tables
|
|
||||||
-optimize=parametrize first try parametrize then do share with the rest
|
|
||||||
-optimize=values represent tables as courses-of-values
|
|
||||||
-optimize=all first try parametrize then do values with the rest
|
|
||||||
-optimize=none no optimization
|
|
||||||
|
|
||||||
-parser, parsing strategy. The default is chart. If -cfg or -mcfg are
|
|
||||||
selected, only bottomup and topdown are recognized.
|
|
||||||
-parser=chart bottom-up chart parsing
|
|
||||||
-parser=bottomup a more up to date bottom-up strategy
|
|
||||||
-parser=topdown top-down strategy
|
|
||||||
-parser=old an old bottom-up chart parser
|
|
||||||
|
|
||||||
-printer, format in which the grammar is printed. The default is
|
|
||||||
gfc. Those marked with M are (only) available for pm, the rest
|
|
||||||
for pg.
|
|
||||||
-printer=gfc GFC grammar
|
|
||||||
-printer=gf GF grammar
|
|
||||||
-printer=old old GF grammar
|
|
||||||
-printer=cf context-free grammar, with profiles
|
|
||||||
-printer=bnf context-free grammar, without profiles
|
|
||||||
-printer=lbnf labelled context-free grammar for BNF Converter
|
|
||||||
-printer=plbnf grammar for BNF Converter, with precedence levels
|
|
||||||
*-printer=happy source file for Happy parser generator (use lbnf!)
|
|
||||||
-printer=haskell abstract syntax in Haskell, with transl to/from GF
|
|
||||||
-printer=haskell_gadt abstract syntax GADT in Haskell, with transl to/from GF
|
|
||||||
-printer=morpho full-form lexicon, long format
|
|
||||||
*-printer=latex LaTeX file (for the tg command)
|
|
||||||
-printer=fullform full-form lexicon, short format
|
|
||||||
*-printer=xml XML: DTD for the pg command, object for st
|
|
||||||
-printer=old old GF: file readable by GF 1.2
|
|
||||||
-printer=stat show some statistics of generated GFC
|
|
||||||
-printer=probs show probabilities of all functions
|
|
||||||
-printer=gsl Nuance GSL speech recognition grammar
|
|
||||||
-printer=jsgf Java Speech Grammar Format
|
|
||||||
-printer=jsgf_sisr_old Java Speech Grammar Format with semantic tags in
|
|
||||||
SISR WD 20030401 format
|
|
||||||
-printer=srgs_abnf SRGS ABNF format
|
|
||||||
-printer=srgs_abnf_non_rec SRGS ABNF format, without any recursion.
|
|
||||||
-printer=srgs_abnf_sisr_old SRGS ABNF format, with semantic tags in
|
|
||||||
SISR WD 20030401 format
|
|
||||||
-printer=srgs_xml SRGS XML format
|
|
||||||
-printer=srgs_xml_non_rec SRGS XML format, without any recursion.
|
|
||||||
-printer=srgs_xml_prob SRGS XML format, with weights
|
|
||||||
-printer=srgs_xml_sisr_old SRGS XML format, with semantic tags in
|
|
||||||
SISR WD 20030401 format
|
|
||||||
-printer=vxml Generate a dialogue system in VoiceXML.
|
|
||||||
-printer=slf a finite automaton in the HTK SLF format
|
|
||||||
-printer=slf_graphviz the same automaton as slf, but in Graphviz format
|
|
||||||
-printer=slf_sub a finite automaton with sub-automata in the
|
|
||||||
HTK SLF format
|
|
||||||
-printer=slf_sub_graphviz the same automaton as slf_sub, but in
|
|
||||||
Graphviz format
|
|
||||||
-printer=fa_graphviz a finite automaton with labelled edges
|
|
||||||
-printer=regular a regular grammar in a simple BNF
|
|
||||||
-printer=unpar a gfc grammar with parameters eliminated
|
|
||||||
-printer=functiongraph abstract syntax functions in 'dot' format
|
|
||||||
-printer=typegraph abstract syntax categories in 'dot' format
|
|
||||||
-printer=transfer Transfer language datatype (.tr file format)
|
|
||||||
-printer=cfg-prolog M cfg in prolog format (also pg)
|
|
||||||
-printer=gfc-prolog M gfc in prolog format (also pg)
|
|
||||||
-printer=gfcm M gfcm file (default for pm)
|
|
||||||
-printer=graph M module dependency graph in 'dot' (graphviz) format
|
|
||||||
-printer=header M gfcm file with header (for GF embedded in Java)
|
|
||||||
-printer=js M JavaScript type annotator and linearizer
|
|
||||||
-printer=mcfg-prolog M mcfg in prolog format (also pg)
|
|
||||||
-printer=missing M the missing linearizations of each concrete
|
|
||||||
|
|
||||||
-startcat, like -cat, but used in grammars (to avoid clash with keyword cat)
|
|
||||||
|
|
||||||
-transform, transformation performed on a syntax tree. The default is identity.
|
|
||||||
-transform=identity no change
|
|
||||||
-transform=compute compute by using definitions in the grammar
|
|
||||||
-transform=nodup return the term only if it has no constants duplicated
|
|
||||||
-transform=nodupatom return the term only if it has no atomic constants duplicated
|
|
||||||
-transform=typecheck return the term only if it is type-correct
|
|
||||||
-transform=solve solve metavariables as derived refinements
|
|
||||||
-transform=context solve metavariables by unique refinements as variables
|
|
||||||
-transform=delete replace the term by metavariable
|
|
||||||
|
|
||||||
-unlexer, untokenization transforming linearization output into a string.
|
|
||||||
The default is unwords.
|
|
||||||
-unlexer=unwords space-separated token list (like unwords)
|
|
||||||
-unlexer=text format as text: punctuation, capitals, paragraph <p>
|
|
||||||
-unlexer=code format as code (spacing, indentation)
|
|
||||||
-unlexer=textlit like text, but remove string literal quotes
|
|
||||||
-unlexer=codelit like code, but remove string literal quotes
|
|
||||||
-unlexer=concat remove all spaces
|
|
||||||
-unlexer=bind like identity, but bind at "&+"
|
|
||||||
|
|
||||||
-mark, marking of parts of tree in linearization. The default is none.
|
|
||||||
-mark=metacat append "+CAT" to every metavariable, showing its category
|
|
||||||
-mark=struct show tree structure with brackets
|
|
||||||
-mark=java show tree structure with XML tags (used in gfeditor)
|
|
||||||
|
|
||||||
-coding, Some grammars are in UTF-8, some in isolatin-1.
|
|
||||||
If the letters ä (a-umlaut) and ö (o-umlaut) look strange, either
|
|
||||||
change your terminal to isolatin-1, or rewrite the grammar with
|
|
||||||
'pg -utf8'.
|
|
||||||
|
|
||||||
-- *: Commands and options marked with * are not currently implemented.
|
|
||||||
@@ -1,250 +0,0 @@
|
|||||||
include config.mk
|
|
||||||
|
|
||||||
|
|
||||||
GHMAKE=$(GHC) --make
|
|
||||||
GHCXMAKE=ghcxmake
|
|
||||||
GHCFLAGS+= -fglasgow-exts
|
|
||||||
GHCOPTFLAGS=-O2
|
|
||||||
GHCFUDFLAG=
|
|
||||||
|
|
||||||
DIST_DIR=GF-$(PACKAGE_VERSION)
|
|
||||||
NOT_IN_DIST= \
|
|
||||||
grammars \
|
|
||||||
download \
|
|
||||||
doc/release2.html \
|
|
||||||
src/tools/AlphaConvGF.hs
|
|
||||||
|
|
||||||
BIN_DIST_DIR=$(DIST_DIR)-$(host)
|
|
||||||
|
|
||||||
GRAMMAR_PACKAGE_VERSION=$(shell date +%Y%m%d)
|
|
||||||
GRAMMAR_DIST_DIR=gf-grammars-$(GRAMMAR_PACKAGE_VERSION)
|
|
||||||
|
|
||||||
MSI_FILE=gf-$(subst .,_,$(PACKAGE_VERSION)).msi
|
|
||||||
|
|
||||||
GF_DATA_DIR=$(datadir)/GF-$(PACKAGE_VERSION)
|
|
||||||
GF_LIB_DIR=$(GF_DATA_DIR)/lib
|
|
||||||
|
|
||||||
EMBED = GF/Embed/TemplateApp
|
|
||||||
|
|
||||||
# use the temporary binary file name 'gf-bin' to not clash with directory 'GF'
|
|
||||||
# on case insensitive file systems (such as FAT)
|
|
||||||
GF_EXE=gf$(EXEEXT)
|
|
||||||
GF_EXE_TMP=gf-bin$(EXEEXT)
|
|
||||||
GF_DOC_EXE=gfdoc$(EXEEXT)
|
|
||||||
|
|
||||||
|
|
||||||
ifeq ("$(READLINE)","readline")
|
|
||||||
GHCFLAGS += -package readline -DUSE_READLINE
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ("$(CPPFLAGS)","")
|
|
||||||
GHCFLAGS += $(addprefix -optP, $(CPPFLAGS))
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ("$(LDFLAGS)","")
|
|
||||||
GHCFLAGS += $(addprefix -optl, $(LDFLAGS))
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ("$(INTERRUPT)","yes")
|
|
||||||
GHCFLAGS += -DUSE_INTERRUPT
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ("$(ATK)","yes")
|
|
||||||
GHCFLAGS += -DUSE_ATK
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ("$(ENABLE_JAVA)", "yes")
|
|
||||||
BUILD_JAR=jar
|
|
||||||
else
|
|
||||||
BUILD_JAR=
|
|
||||||
endif
|
|
||||||
|
|
||||||
.PHONY: all unix jar tags gfdoc windows install install-gf \
|
|
||||||
lib temp install-gfdoc \
|
|
||||||
today help clean windows-msi dist gfc
|
|
||||||
|
|
||||||
all: unix gfc lib
|
|
||||||
|
|
||||||
static: GHCFLAGS += -optl-static
|
|
||||||
static: unix
|
|
||||||
|
|
||||||
|
|
||||||
gf: unix
|
|
||||||
|
|
||||||
unix: today opt
|
|
||||||
|
|
||||||
windows: unix
|
|
||||||
|
|
||||||
temp: today noopt
|
|
||||||
|
|
||||||
|
|
||||||
build:
|
|
||||||
$(GHMAKE) $(GHCFLAGS) GF.hs -o $(GF_EXE_TMP)
|
|
||||||
strip $(GF_EXE_TMP)
|
|
||||||
mv $(GF_EXE_TMP) ../bin/$(GF_EXE)
|
|
||||||
|
|
||||||
opt: GHCFLAGS += $(GHCOPTFLAGS)
|
|
||||||
opt: build
|
|
||||||
|
|
||||||
embed: GHCFLAGS += $(GHCOPTFLAGS)
|
|
||||||
embed:
|
|
||||||
$(GHMAKE) $(GHCFLAGS) $(EMBED) -o $(EMBED)
|
|
||||||
strip $(EMBED)
|
|
||||||
|
|
||||||
noopt: build
|
|
||||||
|
|
||||||
clean:
|
|
||||||
find . '(' -name '*~' -o -name '*.hi' -o -name '*.ghi' -o -name '*.o' ')' -exec rm -f '{}' ';'
|
|
||||||
-rm -f gf.wixobj
|
|
||||||
-rm -f ../bin/$(GF_EXE)
|
|
||||||
$(MAKE) -C tools/c clean
|
|
||||||
$(MAKE) -C ../lib/c clean
|
|
||||||
-rm -f ../bin/gfcc2c
|
|
||||||
|
|
||||||
distclean: clean
|
|
||||||
-rm -f tools/$(GF_DOC_EXE)
|
|
||||||
-rm -f config.status config.mk config.log
|
|
||||||
-rm -f *.tgz *.zip
|
|
||||||
-rm -rf $(DIST_DIR) $(BIN_DIST_DIR)
|
|
||||||
-rm -rf gf.wxs *.msi
|
|
||||||
|
|
||||||
today:
|
|
||||||
echo 'module Paths_gf (version, getDataDir) where' > Paths_gf.hs
|
|
||||||
echo 'import Data.Version' >> Paths_gf.hs
|
|
||||||
echo '{-# NOINLINE version #-}' >> Paths_gf.hs
|
|
||||||
echo 'version :: Version' >> Paths_gf.hs
|
|
||||||
echo 'version = Version {versionBranch = [3,0], versionTags = ["beta3"]}' >> Paths_gf.hs
|
|
||||||
echo 'getDataDir = return "$(GF_DATA_DIR)" :: IO FilePath' >> Paths_gf.hs
|
|
||||||
|
|
||||||
|
|
||||||
showflags:
|
|
||||||
@echo $(GHCFLAGS)
|
|
||||||
|
|
||||||
# added by peb:
|
|
||||||
tracing: GHCFLAGS += -DTRACING
|
|
||||||
tracing: temp
|
|
||||||
|
|
||||||
ghci-trace: GHCFLAGS += -DTRACING
|
|
||||||
ghci-trace: ghci
|
|
||||||
|
|
||||||
#touch-files:
|
|
||||||
# rm -f GF/System/Tracing.{hi,o}
|
|
||||||
# touch GF/System/Tracing.hs
|
|
||||||
|
|
||||||
# profiling
|
|
||||||
prof: GHCOPTFLAGS += -prof -auto-all
|
|
||||||
prof: unix
|
|
||||||
|
|
||||||
tags:
|
|
||||||
find GF Transfer -name '*.hs' | xargs hasktags
|
|
||||||
|
|
||||||
#
|
|
||||||
# Help file
|
|
||||||
#
|
|
||||||
|
|
||||||
tools/MkHelpFile: tools/MkHelpFile.hs
|
|
||||||
$(GHMAKE) -o $@ $^
|
|
||||||
|
|
||||||
help: GF/Shell/HelpFile.hs
|
|
||||||
|
|
||||||
GF/Shell/HelpFile.hs: tools/MkHelpFile HelpFile
|
|
||||||
tools/MkHelpFile
|
|
||||||
|
|
||||||
#
|
|
||||||
# Tools
|
|
||||||
#
|
|
||||||
|
|
||||||
gfdoc: tools/$(GF_DOC_EXE)
|
|
||||||
|
|
||||||
tools/$(GF_DOC_EXE): tools/GFDoc.hs
|
|
||||||
$(GHMAKE) $(GHCOPTFLAGS) -o $@ $^
|
|
||||||
|
|
||||||
gfc: gf
|
|
||||||
echo GFC!
|
|
||||||
cp -f gfc ../bin/
|
|
||||||
chmod a+x ../bin/gfc
|
|
||||||
|
|
||||||
gfcc2c:
|
|
||||||
$(MAKE) -C tools/c
|
|
||||||
$(MAKE) -C ../lib/c
|
|
||||||
mv tools/c/gfcc2c ../bin
|
|
||||||
|
|
||||||
#
|
|
||||||
# Resource grammars
|
|
||||||
#
|
|
||||||
|
|
||||||
lib:
|
|
||||||
$(MAKE) -C ../lib/resource clean all
|
|
||||||
|
|
||||||
#
|
|
||||||
# Distribution
|
|
||||||
#
|
|
||||||
|
|
||||||
dist:
|
|
||||||
-rm -rf $(DIST_DIR)
|
|
||||||
darcs dist --dist-name=$(DIST_DIR)
|
|
||||||
tar -zxf ../$(DIST_DIR).tar.gz
|
|
||||||
rm ../$(DIST_DIR).tar.gz
|
|
||||||
cd $(DIST_DIR)/src && perl -pi -e "s/^AC_INIT\(\[GF\],\[[^\]]*\]/AC_INIT([GF],[$(PACKAGE_VERSION)]/" configure.ac
|
|
||||||
cd $(DIST_DIR)/src && autoconf && rm -rf autom4te.cache
|
|
||||||
# cd $(DIST_DIR)/grammars && sh mkLib.sh
|
|
||||||
cd $(DIST_DIR) && rm -rf $(NOT_IN_DIST)
|
|
||||||
$(TAR) -zcf $(DIST_DIR).tgz $(DIST_DIR)
|
|
||||||
rm -rf $(DIST_DIR)
|
|
||||||
|
|
||||||
snapshot: PACKAGE_VERSION=$(shell date +%Y%m%d)
|
|
||||||
snapshot: DIST_DIR=GF-$(PACKAGE_VERSION)
|
|
||||||
snapshot: dist
|
|
||||||
|
|
||||||
rpm: dist
|
|
||||||
rpmbuild -ta $(DIST_DIR).tgz
|
|
||||||
|
|
||||||
|
|
||||||
binary-dist:
|
|
||||||
rm -rf $(BIN_DIST_DIR)
|
|
||||||
mkdir $(BIN_DIST_DIR)
|
|
||||||
mkdir $(BIN_DIST_DIR)/lib
|
|
||||||
./configure --host="$(host)" --build="$(build)"
|
|
||||||
$(MAKE) gfc gfdoc
|
|
||||||
$(INSTALL) ../bin/$(GF_EXE) tools/$(GF_DOC_EXE) $(BIN_DIST_DIR)
|
|
||||||
$(INSTALL) configure config.guess config.sub install-sh config.mk.in $(BIN_DIST_DIR)
|
|
||||||
$(INSTALL) gfc.in $(BIN_DIST_DIR)
|
|
||||||
$(INSTALL) -m 0644 ../README ../LICENSE $(BIN_DIST_DIR)
|
|
||||||
$(INSTALL) -m 0644 INSTALL.binary $(BIN_DIST_DIR)/INSTALL
|
|
||||||
$(INSTALL) -m 0644 Makefile.binary $(BIN_DIST_DIR)/Makefile
|
|
||||||
# $(TAR) -C $(BIN_DIST_DIR)/lib -zxf ../lib/compiled.tgz
|
|
||||||
$(TAR) -zcf GF-$(PACKAGE_VERSION)-$(host).tgz $(BIN_DIST_DIR)
|
|
||||||
rm -rf $(BIN_DIST_DIR)
|
|
||||||
|
|
||||||
grammar-dist:
|
|
||||||
-rm -rf $(GRAMMAR_DIST_DIR)
|
|
||||||
mkdir $(GRAMMAR_DIST_DIR)
|
|
||||||
cp -r ../_darcs/current/{lib,examples} $(GRAMMAR_DIST_DIR)
|
|
||||||
$(MAKE) GF_LIB_PATH=.. -C $(GRAMMAR_DIST_DIR)/lib/resource-1.0 show-path prelude present alltenses mathematical api multimodal langs
|
|
||||||
$(TAR) -zcf $(GRAMMAR_DIST_DIR).tgz $(GRAMMAR_DIST_DIR)
|
|
||||||
rm -rf $(GRAMMAR_DIST_DIR)
|
|
||||||
|
|
||||||
gf.wxs: config.status gf.wxs.in
|
|
||||||
./config.status --file=$@
|
|
||||||
|
|
||||||
windows-msi: gf.wxs
|
|
||||||
candle -nologo gf.wxs
|
|
||||||
light -nologo -o $(MSI_FILE) gf.wixobj
|
|
||||||
|
|
||||||
#
|
|
||||||
# Installation
|
|
||||||
#
|
|
||||||
|
|
||||||
install: install-gf install-gfdoc install-lib
|
|
||||||
|
|
||||||
install-gf:
|
|
||||||
$(INSTALL) -d $(bindir)
|
|
||||||
$(INSTALL) ../bin/$(GF_EXE) $(bindir)
|
|
||||||
|
|
||||||
install-gfdoc:
|
|
||||||
$(INSTALL) -d $(bindir)
|
|
||||||
$(INSTALL) tools/$(GF_DOC_EXE) $(bindir)
|
|
||||||
|
|
||||||
install-lib:
|
|
||||||
$(INSTALL) -d $(GF_LIB_DIR)
|
|
||||||
$(TAR) -C $(GF_LIB_DIR) -zxf ../lib/compiled.tgz
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
include config.mk
|
|
||||||
|
|
||||||
GF_DATA_DIR=$(datadir)/GF-$(PACKAGE_VERSION)
|
|
||||||
GF_LIB_DIR=$(GF_DATA_DIR)/lib
|
|
||||||
|
|
||||||
.PHONY: install uninstall
|
|
||||||
|
|
||||||
install:
|
|
||||||
$(INSTALL) -d $(bindir)
|
|
||||||
$(INSTALL) gf$(EXEEXT) gfdoc$(EXEEXT) $(bindir)
|
|
||||||
$(INSTALL) gfc$(EXEEXT) $(bindir)
|
|
||||||
$(INSTALL) -d $(GF_DATA_DIR)
|
|
||||||
cp -r lib $(GF_DATA_DIR)
|
|
||||||
|
|
||||||
uninstall:
|
|
||||||
-rm -f $(bindir)/gf$(EXEEXT) $(bindir)/gfdoc$(EXEEXT)
|
|
||||||
-rm -f $GF_LIB_DIR)/*/*.gf{o}
|
|
||||||
-rmdir $(GF_LIB_DIR)/*
|
|
||||||
-rmdir $(GF_LIB_DIR)
|
|
||||||
-rmdir $(GF_DATA_DIR)
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
concrete Eng of Ex = {
|
|
||||||
lincat
|
|
||||||
S = {s : Str} ;
|
|
||||||
NP = {s : Str ; n : Num} ;
|
|
||||||
VP = {s : Num => Str} ;
|
|
||||||
param
|
|
||||||
Num = Sg | Pl ;
|
|
||||||
lin
|
|
||||||
Pred np vp = {s = np.s ++ vp.s ! np.n} ;
|
|
||||||
She = {s = "she" ; n = Sg} ;
|
|
||||||
They = {s = "they" ; n = Pl} ;
|
|
||||||
Sleep = {s = table {Sg => "sleeps" ; Pl => "sleep"}} ;
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
abstract Ex = {
|
|
||||||
cat
|
|
||||||
S ; NP ; VP ;
|
|
||||||
fun
|
|
||||||
Pred : NP -> VP -> S ;
|
|
||||||
She, They : NP ;
|
|
||||||
Sleep : VP ;
|
|
||||||
}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
concrete Swe of Ex = {
|
|
||||||
lincat
|
|
||||||
S = {s : Str} ;
|
|
||||||
NP = {s : Str} ;
|
|
||||||
VP = {s : Str} ;
|
|
||||||
param
|
|
||||||
Num = Sg | Pl ;
|
|
||||||
lin
|
|
||||||
Pred np vp = {s = np.s ++ vp.s} ;
|
|
||||||
She = {s = "hon"} ;
|
|
||||||
They = {s = "de"} ;
|
|
||||||
Sleep = {s = "sover"} ;
|
|
||||||
}
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
-- to test GFCC compilation
|
|
||||||
|
|
||||||
flags coding=utf8 ;
|
|
||||||
|
|
||||||
cat S ; NP ; N ; VP ;
|
|
||||||
|
|
||||||
fun Pred : NP -> VP -> S ;
|
|
||||||
fun Pred2 : NP -> VP -> NP -> S ;
|
|
||||||
fun Det, Dets : N -> NP ;
|
|
||||||
fun Mina, Sina, Me, Te : NP ;
|
|
||||||
fun Raha, Paska, Pallo : N ;
|
|
||||||
fun Puhua, Munia, Sanoa : VP ;
|
|
||||||
|
|
||||||
param Person = P1 | P2 | P3 ;
|
|
||||||
param Number = Sg | Pl ;
|
|
||||||
param Case = Nom | Part ;
|
|
||||||
|
|
||||||
param NForm = NF Number Case ;
|
|
||||||
param VForm = VF Number Person ;
|
|
||||||
|
|
||||||
lincat N = Noun ;
|
|
||||||
lincat VP = Verb ;
|
|
||||||
|
|
||||||
oper Noun = {s : NForm => Str} ;
|
|
||||||
oper Verb = {s : VForm => Str} ;
|
|
||||||
|
|
||||||
lincat NP = {s : Case => Str ; a : {n : Number ; p : Person}} ;
|
|
||||||
|
|
||||||
lin Pred np vp = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p} ;
|
|
||||||
lin Pred2 np vp ob = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p ++ ob.s ! Part} ;
|
|
||||||
lin Det no = {s = \\c => no.s ! NF Sg c ; a = {n = Sg ; p = P3}} ;
|
|
||||||
lin Dets no = {s = \\c => no.s ! NF Pl c ; a = {n = Pl ; p = P3}} ;
|
|
||||||
lin Mina = {s = table Case ["minä" ; "minua"] ; a = {n = Sg ; p = P1}} ;
|
|
||||||
lin Te = {s = table Case ["te" ; "teitä"] ; a = {n = Pl ; p = P2}} ;
|
|
||||||
lin Sina = {s = table Case ["sinä" ; "sinua"] ; a = {n = Sg ; p = P2}} ;
|
|
||||||
lin Me = {s = table Case ["me" ; "meitä"] ; a = {n = Pl ; p = P1}} ;
|
|
||||||
|
|
||||||
lin Raha = mkN "raha" ;
|
|
||||||
lin Paska = mkN "paska" ;
|
|
||||||
lin Pallo = mkN "pallo" ;
|
|
||||||
lin Puhua = mkV "puhu" ;
|
|
||||||
lin Munia = mkV "muni" ;
|
|
||||||
lin Sanoa = mkV "sano" ;
|
|
||||||
|
|
||||||
oper mkN : Str -> Noun = \raha -> {
|
|
||||||
s = table {
|
|
||||||
NF Sg Nom => raha ;
|
|
||||||
NF Sg Part => raha + "a" ;
|
|
||||||
NF Pl Nom => raha + "t" ;
|
|
||||||
NF Pl Part => Predef.tk 1 raha + "oja"
|
|
||||||
}
|
|
||||||
} ;
|
|
||||||
|
|
||||||
oper mkV : Str -> Verb = \puhu -> {
|
|
||||||
s = table {
|
|
||||||
VF Sg P1 => puhu + "n" ;
|
|
||||||
VF Sg P2 => puhu + "t" ;
|
|
||||||
VF Sg P3 => puhu + Predef.dp 1 puhu ;
|
|
||||||
VF Pl P1 => puhu + "mme" ;
|
|
||||||
VF Pl P2 => puhu + "tte" ;
|
|
||||||
VF Pl P3 => puhu + "vat"
|
|
||||||
}
|
|
||||||
} ;
|
|
||||||
|
|
||||||
@@ -1,809 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
||||||
<HTML>
|
|
||||||
<HEAD>
|
|
||||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
|
||||||
<TITLE>The GFCC Grammar Format</TITLE>
|
|
||||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
|
||||||
<P ALIGN="center"><CENTER><H1>The GFCC Grammar Format</H1>
|
|
||||||
<FONT SIZE="4">
|
|
||||||
<I>Aarne Ranta</I><BR>
|
|
||||||
October 5, 2007
|
|
||||||
</FONT></CENTER>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Author's address:
|
|
||||||
<A HREF="http://www.cs.chalmers.se/~aarne"><CODE>http://www.cs.chalmers.se/~aarne</CODE></A>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
History:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>5 Oct 2007: new, better structured GFCC with full expressive power
|
|
||||||
<LI>19 Oct: translation of lincats, new figures on C++
|
|
||||||
<LI>3 Oct 2006: first version
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<H2>What is GFCC</H2>
|
|
||||||
<P>
|
|
||||||
GFCC is a low-level format for GF grammars. Its aim is to contain the minimum
|
|
||||||
that is needed to process GF grammars at runtime. This minimality has three
|
|
||||||
advantages:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>compact grammar files and run-time objects
|
|
||||||
<LI>time and space efficient processing
|
|
||||||
<LI>simple definition of interpreters
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Thus we also want to call GFCC the <B>portable grammar format</B>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The idea is that all embedded GF applications use GFCC.
|
|
||||||
The GF system would be primarily used as a compiler and as a grammar
|
|
||||||
development tool.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Since GFCC is implemented in BNFC, a parser of the format is readily
|
|
||||||
available for C, C++, C#, Haskell, Java, and OCaml. Also an XML
|
|
||||||
representation can be generated in BNFC. A
|
|
||||||
<A HREF="../">reference implementation</A>
|
|
||||||
of linearization and some other functions has been written in Haskell.
|
|
||||||
</P>
|
|
||||||
<H2>GFCC vs. GFC</H2>
|
|
||||||
<P>
|
|
||||||
GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed
|
|
||||||
to be a run-time format, but also to
|
|
||||||
support separate compilation of grammars, i.e.
|
|
||||||
to store the results of compiling
|
|
||||||
individual GF modules. But this means that GFC has to contain extra information,
|
|
||||||
such as type annotations, which is only needed in compilation and not at
|
|
||||||
run-time. In particular, the pattern matching syntax and semantics of GFC is
|
|
||||||
complex and therefore difficult to implement in new platforms.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Actually, GFC is planned to be omitted also as the target format of
|
|
||||||
separate compilation, where plain GF (type annotated and partially evaluated)
|
|
||||||
will be used instead. GFC provides only marginal advantages as a target format
|
|
||||||
compared with GF, and it is therefore just extra weight to carry around this
|
|
||||||
format.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The main differences of GFCC compared with GFC (and GF) can be summarized as follows:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>there are no modules, and therefore no qualified names
|
|
||||||
<LI>a GFCC grammar is multilingual, and consists of a common abstract syntax
|
|
||||||
together with one concrete syntax per language
|
|
||||||
<LI>records and tables are replaced by arrays
|
|
||||||
<LI>record labels and parameter values are replaced by integers
|
|
||||||
<LI>record projection and table selection are replaced by array indexing
|
|
||||||
<LI>even though the format does support dependent types and higher-order abstract
|
|
||||||
syntax, there is no interpreted yet that does this
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Here is an example of a GF grammar, consisting of three modules,
|
|
||||||
as translated to GFCC. The representations are aligned; thus they do not completely
|
|
||||||
reflect the order of judgements in GFCC files, which have different orders of
|
|
||||||
blocks of judgements, and alphabetical sorting.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
grammar Ex(Eng,Swe);
|
|
||||||
|
|
||||||
abstract Ex = { abstract {
|
|
||||||
cat cat
|
|
||||||
S ; NP ; VP ; NP[]; S[]; VP[];
|
|
||||||
fun fun
|
|
||||||
Pred : NP -> VP -> S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
|
|
||||||
She, They : NP ; She=[0,"she"];
|
|
||||||
Sleep : VP ; They=[1,"they"];
|
|
||||||
Sleep=[["sleeps","sleep"]];
|
|
||||||
} } ;
|
|
||||||
|
|
||||||
concrete Eng of Ex = { concrete Eng {
|
|
||||||
lincat lincat
|
|
||||||
S = {s : Str} ; S=[()];
|
|
||||||
NP = {s : Str ; n : Num} ; NP=[1,()];
|
|
||||||
VP = {s : Num => Str} ; VP=[[(),()]];
|
|
||||||
param
|
|
||||||
Num = Sg | Pl ;
|
|
||||||
lin lin
|
|
||||||
Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
|
|
||||||
s = np.s ++ vp.s ! np.n} ;
|
|
||||||
She = {s = "she" ; n = Sg} ; She=[0,"she"];
|
|
||||||
They = {s = "they" ; n = Pl} ; They = [1, "they"];
|
|
||||||
Sleep = {s = table { Sleep=[["sleeps","sleep"]];
|
|
||||||
Sg => "sleeps" ;
|
|
||||||
Pl => "sleep"
|
|
||||||
}
|
|
||||||
} ;
|
|
||||||
} } ;
|
|
||||||
|
|
||||||
concrete Swe of Ex = { concrete Swe {
|
|
||||||
lincat lincat
|
|
||||||
S = {s : Str} ; S=[()];
|
|
||||||
NP = {s : Str} ; NP=[()];
|
|
||||||
VP = {s : Str} ; VP=[()];
|
|
||||||
param
|
|
||||||
Num = Sg | Pl ;
|
|
||||||
lin lin
|
|
||||||
Pred np vp = { Pred = [(($0!0),($1!0))];
|
|
||||||
s = np.s ++ vp.s} ;
|
|
||||||
She = {s = "hon"} ; She = ["hon"];
|
|
||||||
They = {s = "de"} ; They = ["de"];
|
|
||||||
Sleep = {s = "sover"} ; Sleep = ["sover"];
|
|
||||||
} } ;
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<H2>The syntax of GFCC files</H2>
|
|
||||||
<P>
|
|
||||||
The complete BNFC grammar, from which
|
|
||||||
the rules in this section are taken, is in the file
|
|
||||||
<A HREF="../DataGFCC.cf"><CODE>GF/GFCC/GFCC.cf</CODE></A>.
|
|
||||||
</P>
|
|
||||||
<H3>Top level</H3>
|
|
||||||
<P>
|
|
||||||
A grammar has a header telling the name of the abstract syntax
|
|
||||||
(often specifying an application domain), and the names of
|
|
||||||
the concrete languages. The abstract syntax and the concrete
|
|
||||||
syntaxes themselves follow.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Grm. Grammar ::=
|
|
||||||
"grammar" CId "(" [CId] ")" ";"
|
|
||||||
Abstract ";"
|
|
||||||
[Concrete] ;
|
|
||||||
|
|
||||||
Abs. Abstract ::=
|
|
||||||
"abstract" "{"
|
|
||||||
"flags" [Flag]
|
|
||||||
"fun" [FunDef]
|
|
||||||
"cat" [CatDef]
|
|
||||||
"}" ;
|
|
||||||
|
|
||||||
Cnc. Concrete ::=
|
|
||||||
"concrete" CId "{"
|
|
||||||
"flags" [Flag]
|
|
||||||
"lin" [LinDef]
|
|
||||||
"oper" [LinDef]
|
|
||||||
"lincat" [LinDef]
|
|
||||||
"lindef" [LinDef]
|
|
||||||
"printname" [LinDef]
|
|
||||||
"}" ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
This syntax organizes each module to a sequence of <B>fields</B>, such
|
|
||||||
as flags, linearizations, operations, linearization types, etc.
|
|
||||||
It is envisaged that particular applications can ignore some
|
|
||||||
of the fields, typically so that earlier fields are more
|
|
||||||
important than later ones.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The judgement forms have the following syntax.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Flg. Flag ::= CId "=" String ;
|
|
||||||
Cat. CatDef ::= CId "[" [Hypo] "]" ;
|
|
||||||
Fun. FunDef ::= CId ":" Type "=" Exp ;
|
|
||||||
Lin. LinDef ::= CId "=" Term ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
For the run-time system, the reference implementation in Haskell
|
|
||||||
uses a structure that gives efficient look-up:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
data GFCC = GFCC {
|
|
||||||
absname :: CId ,
|
|
||||||
cncnames :: [CId] ,
|
|
||||||
abstract :: Abstr ,
|
|
||||||
concretes :: Map CId Concr
|
|
||||||
}
|
|
||||||
|
|
||||||
data Abstr = Abstr {
|
|
||||||
aflags :: Map CId String, -- value of a flag
|
|
||||||
funs :: Map CId (Type,Exp), -- type and def of a fun
|
|
||||||
cats :: Map CId [Hypo], -- context of a cat
|
|
||||||
catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup)
|
|
||||||
}
|
|
||||||
|
|
||||||
data Concr = Concr {
|
|
||||||
flags :: Map CId String, -- value of a flag
|
|
||||||
lins :: Map CId Term, -- lin of a fun
|
|
||||||
opers :: Map CId Term, -- oper generated by subex elim
|
|
||||||
lincats :: Map CId Term, -- lin type of a cat
|
|
||||||
lindefs :: Map CId Term, -- lin default of a cat
|
|
||||||
printnames :: Map CId Term -- printname of a cat or a fun
|
|
||||||
}
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
These definitions are from <A HREF="../DataGFCC.hs"><CODE>GF/GFCC/DataGFCC.hs</CODE></A>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Identifiers (<CODE>CId</CODE>) are like <CODE>Ident</CODE> in GF, except that
|
|
||||||
the compiler produces constants prefixed with <CODE>_</CODE> in
|
|
||||||
the common subterm elimination optimization.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<H3>Abstract syntax</H3>
|
|
||||||
<P>
|
|
||||||
Types are first-order function types built from argument type
|
|
||||||
contexts and value types.
|
|
||||||
category symbols. Syntax trees (<CODE>Exp</CODE>) are
|
|
||||||
rose trees with nodes consisting of a head (<CODE>Atom</CODE>) and
|
|
||||||
bound variables (<CODE>CId</CODE>).
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ;
|
|
||||||
DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ;
|
|
||||||
Hyp. Hypo ::= CId ":" Type ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
The head Atom is either a function
|
|
||||||
constant, a bound variable, or a metavariable, or a string, integer, or float
|
|
||||||
literal.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
AC. Atom ::= CId ;
|
|
||||||
AS. Atom ::= String ;
|
|
||||||
AI. Atom ::= Integer ;
|
|
||||||
AF. Atom ::= Double ;
|
|
||||||
AM. Atom ::= "?" Integer ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
The context-free types and trees of the "old GFCC" are special
|
|
||||||
cases, which can be defined as follows:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Typ. Type ::= [CId] "->" CId
|
|
||||||
Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val
|
|
||||||
|
|
||||||
Tr. Exp ::= "(" CId [Exp] ")"
|
|
||||||
Tr fun exps = DTr [] fun exps
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
To store semantic (<CODE>def</CODE>) definitions by cases, the following expression
|
|
||||||
form is provided, but it is only meaningful in the last field of a function
|
|
||||||
declaration in an abstract syntax:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
EEq. Exp ::= "{" [Equation] "}" ;
|
|
||||||
Equ. Equation ::= [Exp] "->" Exp ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
Notice that expressions are used to encode patterns. Primitive notions
|
|
||||||
(the default semantics in GF) are encoded as empty sets of equations
|
|
||||||
(<CODE>[]</CODE>). For a constructor (canonical form) of a category <CODE>C</CODE>, we
|
|
||||||
aim to use the encoding as the application <CODE>(_constr C)</CODE>.
|
|
||||||
</P>
|
|
||||||
<H3>Concrete syntax</H3>
|
|
||||||
<P>
|
|
||||||
Linearization terms (<CODE>Term</CODE>) are built as follows.
|
|
||||||
Constructor names are shown to make the later code
|
|
||||||
examples readable.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
R. Term ::= "[" [Term] "]" ; -- array (record/table)
|
|
||||||
P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection)
|
|
||||||
S. Term ::= "(" [Term] ")" ; -- concatenated sequence
|
|
||||||
K. Term ::= Tokn ; -- token
|
|
||||||
V. Term ::= "$" Integer ; -- argument (subtree)
|
|
||||||
C. Term ::= Integer ; -- array index (label/parameter value)
|
|
||||||
FV. Term ::= "[|" [Term] "|]" ; -- free variation
|
|
||||||
TM. Term ::= "?" ; -- linearization of metavariable
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
Tokens are strings or (maybe obsolescent) prefix-dependent
|
|
||||||
variant lists.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
KS. Tokn ::= String ;
|
|
||||||
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
|
|
||||||
Var. Variant ::= [String] "/" [String] ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
Two special forms of terms are introduced by the compiler
|
|
||||||
as optimizations. They can in principle be eliminated, but
|
|
||||||
their presence makes grammars much more compact. Their semantics
|
|
||||||
will be explained in a later section.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
F. Term ::= CId ; -- global constant
|
|
||||||
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
There is also a deprecated form of "record parameter alias",
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
which will be removed when the migration to new GFCC is complete.
|
|
||||||
</P>
|
|
||||||
<H2>The semantics of concrete syntax terms</H2>
|
|
||||||
<P>
|
|
||||||
The code in this section is from <A HREF="../Linearize.hs"><CODE>GF/GFCC/Linearize.hs</CODE></A>.
|
|
||||||
</P>
|
|
||||||
<H3>Linearization and realization</H3>
|
|
||||||
<P>
|
|
||||||
The linearization algorithm is essentially the same as in
|
|
||||||
GFC: a tree is linearized by evaluating its linearization term
|
|
||||||
in the environment of the linearizations of the subtrees.
|
|
||||||
Literal atoms are linearized in the obvious way.
|
|
||||||
The function also needs to know the language (i.e. concrete syntax)
|
|
||||||
in which linearization is performed.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
linExp :: GFCC -> CId -> Exp -> Term
|
|
||||||
linExp gfcc lang tree@(DTr _ at trees) = case at of
|
|
||||||
AC fun -> comp (Prelude.map lin trees) $ look fun
|
|
||||||
AS s -> R [kks (show s)] -- quoted
|
|
||||||
AI i -> R [kks (show i)]
|
|
||||||
AF d -> R [kks (show d)]
|
|
||||||
AM -> TM
|
|
||||||
where
|
|
||||||
lin = linExp gfcc lang
|
|
||||||
comp = compute gfcc lang
|
|
||||||
look = lookLin gfcc lang
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
TODO: bindings must be supported.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The result of linearization is usually a record, which is realized as
|
|
||||||
a string using the following algorithm.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
realize :: Term -> String
|
|
||||||
realize trm = case trm of
|
|
||||||
R (t:_) -> realize t
|
|
||||||
S ss -> unwords $ Prelude.map realize ss
|
|
||||||
K (KS s) -> s
|
|
||||||
K (KP s _) -> unwords s ---- prefix choice TODO
|
|
||||||
W s t -> s ++ realize t
|
|
||||||
FV (t:_) -> realize t
|
|
||||||
TM -> "?"
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
Notice that realization always picks the first field of a record.
|
|
||||||
If a linearization type has more than one field, the first field
|
|
||||||
does not necessarily contain the desired string.
|
|
||||||
Also notice that the order of record fields in GFCC is not necessarily
|
|
||||||
the same as in GF source.
|
|
||||||
</P>
|
|
||||||
<H3>Term evaluation</H3>
|
|
||||||
<P>
|
|
||||||
Evaluation follows call-by-value order, with two environments
|
|
||||||
needed:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>the grammar (a concrete syntax) to give the global constants
|
|
||||||
<LI>an array of terms to give the subtree linearizations
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
The code is presented in one-level pattern matching, to
|
|
||||||
enable reimplementations in languages that do not permit
|
|
||||||
deep patterns (such as Java and C++).
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
|
||||||
compute gfcc lang args = comp where
|
|
||||||
comp trm = case trm of
|
|
||||||
P r p -> proj (comp r) (comp p)
|
|
||||||
W s t -> W s (comp t)
|
|
||||||
R ts -> R $ Prelude.map comp ts
|
|
||||||
V i -> idx args (fromInteger i) -- already computed
|
|
||||||
F c -> comp $ look c -- not computed (if contains V)
|
|
||||||
FV ts -> FV $ Prelude.map comp ts
|
|
||||||
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
|
||||||
_ -> trm
|
|
||||||
|
|
||||||
look = lookOper gfcc lang
|
|
||||||
|
|
||||||
idx xs i = xs !! i
|
|
||||||
|
|
||||||
proj r p = case (r,p) of
|
|
||||||
(_, FV ts) -> FV $ Prelude.map (proj r) ts
|
|
||||||
(W s t, _) -> kks (s ++ getString (proj t p))
|
|
||||||
_ -> comp $ getField r (getIndex p)
|
|
||||||
|
|
||||||
getString t = case t of
|
|
||||||
K (KS s) -> s
|
|
||||||
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
|
|
||||||
|
|
||||||
getIndex t = case t of
|
|
||||||
C i -> fromInteger i
|
|
||||||
RP p _ -> getIndex p
|
|
||||||
TM -> 0 -- default value for parameter
|
|
||||||
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
|
|
||||||
|
|
||||||
getField t i = case t of
|
|
||||||
R rs -> idx rs i
|
|
||||||
RP _ r -> getField r i
|
|
||||||
TM -> TM
|
|
||||||
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<H3>The special term constructors</H3>
|
|
||||||
<P>
|
|
||||||
The three forms introduced by the compiler may a need special
|
|
||||||
explanation.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Global constants
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Term ::= CId ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
are shorthands for complex terms. They are produced by the
|
|
||||||
compiler by (iterated) <B>common subexpression elimination</B>.
|
|
||||||
They are often more powerful than hand-devised code sharing in the source
|
|
||||||
code. They could be computed off-line by replacing each identifier by
|
|
||||||
its definition.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<B>Prefix-suffix tables</B>
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Term ::= "(" String "+" Term ")" ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
represent tables of word forms divided to the longest common prefix
|
|
||||||
and its array of suffixes. In the example grammar above, we have
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Sleep = [("sleep" + ["s",""])]
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
which in fact is equal to the array of full forms
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
["sleeps", "sleep"]
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
The power of this construction comes from the fact that suffix sets
|
|
||||||
tend to be repeated in a language, and can therefore be collected
|
|
||||||
by common subexpression elimination. It is this technique that
|
|
||||||
explains the used syntax rather than the more accurate
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
"(" String "+" [String] ")"
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
since we want the suffix part to be a <CODE>Term</CODE> for the optimization to
|
|
||||||
take effect.
|
|
||||||
</P>
|
|
||||||
<H2>Compiling to GFCC</H2>
|
|
||||||
<P>
|
|
||||||
Compilation to GFCC is performed by the GF grammar compiler, and
|
|
||||||
GFCC interpreters need not know what it does. For grammar writers,
|
|
||||||
however, it might be interesting to know what happens to the grammars
|
|
||||||
in the process.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The compilation phases are the following
|
|
||||||
</P>
|
|
||||||
<OL>
|
|
||||||
<LI>type check and partially evaluate GF source
|
|
||||||
<LI>create a symbol table mapping the GF parameter and record types to
|
|
||||||
fixed-size arrays, and parameter values and record labels to integers
|
|
||||||
<LI>traverse the linearization rules replacing parameters and labels by integers
|
|
||||||
<LI>reorganize the created GF grammar so that it has just one abstract syntax
|
|
||||||
and one concrete syntax per language
|
|
||||||
<LI>TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the
|
|
||||||
<CODE>coding</CODE> flag)
|
|
||||||
<LI>translate the GF grammar object to a GFCC grammar object, using a simple
|
|
||||||
compositional mapping
|
|
||||||
<LI>perform the word-suffix optimization on GFCC linearization terms
|
|
||||||
<LI>perform subexpression elimination on each concrete syntax module
|
|
||||||
<LI>print out the GFCC code
|
|
||||||
</OL>
|
|
||||||
|
|
||||||
<H3>Problems in GFCC compilation</H3>
|
|
||||||
<P>
|
|
||||||
Two major problems had to be solved in compiling GF to GFCC:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>consistent order of tables and records, to permit the array translation
|
|
||||||
<LI>run-time variables in complex parameter values.
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
The current implementation is still experimental and may fail
|
|
||||||
to generate correct code. Any errors remaining are likely to be
|
|
||||||
related to the two problems just mentioned.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The order problem is solved in slightly different ways for tables and records.
|
|
||||||
In both cases, <B>eta expansion</B> is used to establish a
|
|
||||||
canonical order. Tables are ordered by applying the preorder induced
|
|
||||||
by <CODE>param</CODE> definitions. Records are ordered by sorting them by labels.
|
|
||||||
This means that
|
|
||||||
e.g. the <CODE>s</CODE> field will in general no longer appear as the first
|
|
||||||
field, even if it does so in the GF source code. But relying on the
|
|
||||||
order of fields in a labelled record would be misplaced anyway.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The canonical form of records is further complicated by lock fields,
|
|
||||||
i.e. dummy fields of form <CODE>lock_C = <></CODE>, which are added to grammar
|
|
||||||
libraries to force intensionality of linearization types. The problem
|
|
||||||
is that the absence of a lock field only generates a warning, not
|
|
||||||
an error. Therefore a GF grammar can contain objects of the same
|
|
||||||
type with and without a lock field. This problem was solved in GFCC
|
|
||||||
generation by just removing all lock fields (defined as fields whose
|
|
||||||
type is the empty record type). This has the further advantage of
|
|
||||||
(slightly) reducing the grammar size. More importantly, it is safe
|
|
||||||
to remove lock fields, because they are never used in computation,
|
|
||||||
and because intensional types are only needed in grammars reused
|
|
||||||
as libraries, not in grammars used at runtime.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
While the order problem is rather bureaucratic in nature, run-time
|
|
||||||
variables are an interesting problem. They arise in the presence
|
|
||||||
of complex parameter values, created by argument-taking constructors
|
|
||||||
and parameter records. To give an example, consider the GF parameter
|
|
||||||
type system
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Number = Sg | Pl ;
|
|
||||||
Person = P1 | P2 | P3 ;
|
|
||||||
Agr = Ag Number Person ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
The values can be translated to integers in the expected way,
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Sg = 0, Pl = 1
|
|
||||||
P1 = 0, P2 = 1, P3 = 2
|
|
||||||
Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
|
|
||||||
Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
However, an argument of <CODE>Agr</CODE> can be a run-time variable, as in
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Ag np.n P3
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
This expression must first be translated to a case expression,
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
case np.n of {
|
|
||||||
0 => 2 ;
|
|
||||||
1 => 5
|
|
||||||
}
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
which can then be translated to the GFCC term
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
([2,5] ! ($0 ! $1))
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
assuming that the variable <CODE>np</CODE> is the first argument and that its
|
|
||||||
<CODE>Number</CODE> field is the second in the record.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
This transformation of course has to be performed recursively, since
|
|
||||||
there can be several run-time variables in a parameter value:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
Ag np.n np.p
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
A similar transformation would be possible to deal with the double
|
|
||||||
role of parameter records discussed above. Thus the type
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
RNP = {n : Number ; p : Person}
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
could be uniformly translated into the set <CODE>{0,1,2,3,4,5}</CODE>
|
|
||||||
as <CODE>Agr</CODE> above. Selections would be simple instances of indexing.
|
|
||||||
But any projection from the record should be translated into
|
|
||||||
a case expression,
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
rnp.n ===>
|
|
||||||
case rnp of {
|
|
||||||
0 => 0 ;
|
|
||||||
1 => 0 ;
|
|
||||||
2 => 0 ;
|
|
||||||
3 => 1 ;
|
|
||||||
4 => 1 ;
|
|
||||||
5 => 1
|
|
||||||
}
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
To avoid the code bloat resulting from this, we have chosen to
|
|
||||||
deal with records by a <B>currying</B> transformation:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
table {n : Number ; p : Person} {... ...}
|
|
||||||
===>
|
|
||||||
table Number {Sg => table Person {...} ; table Person {...}}
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
This is performed when GFCC is generated. Selections with
|
|
||||||
records have to be treated likewise,
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
t ! r ===> t ! r.n ! r.p
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<H3>The representation of linearization types</H3>
|
|
||||||
<P>
|
|
||||||
Linearization types (<CODE>lincat</CODE>) are not needed when generating with
|
|
||||||
GFCC, but they have been added to enable parser generation directly from
|
|
||||||
GFCC. The linearization type definitions are shown as a part of the
|
|
||||||
concrete syntax, by using terms to represent types. Here is the table
|
|
||||||
showing how different linearization types are encoded.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
P* = max(P) -- parameter type
|
|
||||||
{r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record
|
|
||||||
(P => T)* = [T* ,...,T*] -- table, size(P) cases
|
|
||||||
Str* = ()
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
For example, the linearization type <CODE>present/CatEng.NP</CODE> is
|
|
||||||
translated as follows:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
NP = {
|
|
||||||
a : { -- 6 = 2*3 values
|
|
||||||
n : {ParamX.Number} ; -- 2 values
|
|
||||||
p : {ParamX.Person} -- 3 values
|
|
||||||
} ;
|
|
||||||
s : {ResEng.Case} => Str -- 3 values
|
|
||||||
}
|
|
||||||
|
|
||||||
__NP = [[1,2],[(),(),()]]
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<H3>Running the compiler and the GFCC interpreter</H3>
|
|
||||||
<P>
|
|
||||||
GFCC generation is a part of the
|
|
||||||
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">developers' version</A>
|
|
||||||
of GF since September 2006. To invoke the compiler, the flag
|
|
||||||
<CODE>-printer=gfcc</CODE> to the command
|
|
||||||
<CODE>pm = print_multi</CODE> is used. It is wise to recompile the grammar from
|
|
||||||
source, since previously compiled libraries may not obey the canonical
|
|
||||||
order of records.
|
|
||||||
Here is an example, performed in
|
|
||||||
<A HREF="../../../../../examples/bronzeage">example/bronzeage</A>.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
|
|
||||||
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
|
|
||||||
strip
|
|
||||||
pm -printer=gfcc | wf bronze.gfcc
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
There is also an experimental batch compiler, which does not use the GFC
|
|
||||||
format or the record aliases. It can be produced by
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
make gfc
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
in <CODE>GF/src</CODE>, and invoked by
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
gfc --make FILES
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<H2>The reference interpreter</H2>
|
|
||||||
<P>
|
|
||||||
The reference interpreter written in Haskell consists of the following files:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
-- source file for BNFC
|
|
||||||
GFCC.cf -- labelled BNF grammar of gfcc
|
|
||||||
|
|
||||||
-- files generated by BNFC
|
|
||||||
AbsGFCC.hs -- abstrac syntax datatypes
|
|
||||||
ErrM.hs -- error monad used internally
|
|
||||||
LexGFCC.hs -- lexer of gfcc files
|
|
||||||
ParGFCC.hs -- parser of gfcc files and syntax trees
|
|
||||||
PrintGFCC.hs -- printer of gfcc files and syntax trees
|
|
||||||
|
|
||||||
-- hand-written files
|
|
||||||
DataGFCC.hs -- grammar datatype, post-parser grammar creation
|
|
||||||
Linearize.hs -- linearization and evaluation
|
|
||||||
Macros.hs -- utilities abstracting away from GFCC datatypes
|
|
||||||
Generate.hs -- random and exhaustive generation, generate-and-test parsing
|
|
||||||
API.hs -- functionalities accessible in embedded GF applications
|
|
||||||
Generate.hs -- random and exhaustive generation
|
|
||||||
Shell.hs -- main function - a simple command interpreter
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
It is included in the
|
|
||||||
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">developers' version</A>
|
|
||||||
of GF, in the subdirectories <A HREF="../"><CODE>GF/src/GF/GFCC</CODE></A> and
|
|
||||||
<A HREF="../../Devel"><CODE>GF/src/GF/Devel</CODE></A>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir
|
|
||||||
Angelov). The interpreter uses the relevant modules
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC
|
|
||||||
GF/Parsing/FCFG.hs -- run the parser
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<P>
|
|
||||||
To compile the interpreter, type
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
make gfcc
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
in <CODE>GF/src</CODE>. To run it, type
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
./gfcc <GFCC-file>
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
The available commands are
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><CODE>gr <Cat> <Int></CODE>: generate a number of random trees in category.
|
|
||||||
and show their linearizations in all languages
|
|
||||||
<LI><CODE>grt <Cat> <Int></CODE>: generate a number of random trees in category.
|
|
||||||
and show the trees and their linearizations in all languages
|
|
||||||
<LI><CODE>gt <Cat> <Int></CODE>: generate a number of trees in category from smallest,
|
|
||||||
and show their linearizations in all languages
|
|
||||||
<LI><CODE>gtt <Cat> <Int></CODE>: generate a number of trees in category from smallest,
|
|
||||||
and show the trees and their linearizations in all languages
|
|
||||||
<LI><CODE>p <Lang> <Cat> <String></CODE>: parse a string into a set of trees
|
|
||||||
<LI><CODE>lin <Tree></CODE>: linearize tree in all languages, also showing full records
|
|
||||||
<LI><CODE>q</CODE>: terminate the system cleanly
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<H2>Embedded formats</H2>
|
|
||||||
<UL>
|
|
||||||
<LI>JavaScript: compiler of linearization and abstract syntax
|
|
||||||
<P></P>
|
|
||||||
<LI>Haskell: compiler of abstract syntax and interpreter with parsing,
|
|
||||||
linearization, and generation
|
|
||||||
<P></P>
|
|
||||||
<LI>C: compiler of linearization (old GFCC)
|
|
||||||
<P></P>
|
|
||||||
<LI>C++: embedded interpreter supporting linearization (old GFCC)
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<H2>Some things to do</H2>
|
|
||||||
<P>
|
|
||||||
Support for dependent types, higher-order abstract syntax, and
|
|
||||||
semantic definition in GFCC generation and interpreters.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Replacing the entire GF shell by one based on GFCC.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Interpreter in Java.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Hand-written parsers for GFCC grammars to reduce code size
|
|
||||||
(and efficiency?) of interpreters.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Binary format and/or file compression of GFCC output.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Syntax editor based on GFCC.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Rewriting of resource libraries in order to exploit the
|
|
||||||
word-suffix sharing better (depth-one tables, as in FM).
|
|
||||||
</P>
|
|
||||||
|
|
||||||
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
|
|
||||||
<!-- cmdline: txt2tags -thtml gfcc.txt -->
|
|
||||||
</BODY></HTML>
|
|
||||||
@@ -1,712 +0,0 @@
|
|||||||
The GFCC Grammar Format
|
|
||||||
Aarne Ranta
|
|
||||||
December 14, 2007
|
|
||||||
|
|
||||||
Author's address:
|
|
||||||
[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
|
|
||||||
|
|
||||||
% to compile: txt2tags -thtml --toc gfcc.txt
|
|
||||||
|
|
||||||
History:
|
|
||||||
- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC
|
|
||||||
- 5 Oct 2007: new, better structured GFCC with full expressive power
|
|
||||||
- 19 Oct: translation of lincats, new figures on C++
|
|
||||||
- 3 Oct 2006: first version
|
|
||||||
|
|
||||||
|
|
||||||
==What is GFCC==
|
|
||||||
|
|
||||||
GFCC is a low-level format for GF grammars. Its aim is to contain the minimum
|
|
||||||
that is needed to process GF grammars at runtime. This minimality has three
|
|
||||||
advantages:
|
|
||||||
- compact grammar files and run-time objects
|
|
||||||
- time and space efficient processing
|
|
||||||
- simple definition of interpreters
|
|
||||||
|
|
||||||
|
|
||||||
Thus we also want to call GFCC the **portable grammar format**.
|
|
||||||
|
|
||||||
The idea is that all embedded GF applications use GFCC.
|
|
||||||
The GF system would be primarily used as a compiler and as a grammar
|
|
||||||
development tool.
|
|
||||||
|
|
||||||
Since GFCC is implemented in BNFC, a parser of the format is readily
|
|
||||||
available for C, C++, C#, Haskell, Java, and OCaml. Also an XML
|
|
||||||
representation can be generated in BNFC. A
|
|
||||||
[reference implementation ../]
|
|
||||||
of linearization and some other functions has been written in Haskell.
|
|
||||||
|
|
||||||
|
|
||||||
==GFCC vs. GFC==
|
|
||||||
|
|
||||||
GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed
|
|
||||||
to be a run-time format, but also to
|
|
||||||
support separate compilation of grammars, i.e.
|
|
||||||
to store the results of compiling
|
|
||||||
individual GF modules. But this means that GFC has to contain extra information,
|
|
||||||
such as type annotations, which is only needed in compilation and not at
|
|
||||||
run-time. In particular, the pattern matching syntax and semantics of GFC is
|
|
||||||
complex and therefore difficult to implement in new platforms.
|
|
||||||
|
|
||||||
Actually, GFC is planned to be omitted also as the target format of
|
|
||||||
separate compilation, where plain GF (type annotated and partially evaluated)
|
|
||||||
will be used instead. GFC provides only marginal advantages as a target format
|
|
||||||
compared with GF, and it is therefore just extra weight to carry around this
|
|
||||||
format.
|
|
||||||
|
|
||||||
The main differences of GFCC compared with GFC (and GF) can be
|
|
||||||
summarized as follows:
|
|
||||||
- there are no modules, and therefore no qualified names
|
|
||||||
- a GFCC grammar is multilingual, and consists of a common abstract syntax
|
|
||||||
together with one concrete syntax per language
|
|
||||||
- records and tables are replaced by arrays
|
|
||||||
- record labels and parameter values are replaced by integers
|
|
||||||
- record projection and table selection are replaced by array indexing
|
|
||||||
- even though the format does support dependent types and higher-order abstract
|
|
||||||
syntax, there is no interpreted yet that does this
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Here is an example of a GF grammar, consisting of three modules,
|
|
||||||
as translated to GFCC. The representations are aligned;
|
|
||||||
thus they do not completely
|
|
||||||
reflect the order of judgements in GFCC files, which have different orders of
|
|
||||||
blocks of judgements, and alphabetical sorting.
|
|
||||||
```
|
|
||||||
grammar Ex(Eng,Swe);
|
|
||||||
|
|
||||||
abstract Ex = { abstract {
|
|
||||||
cat cat
|
|
||||||
S ; NP ; VP ; NP[]; S[]; VP[];
|
|
||||||
fun fun
|
|
||||||
Pred : NP -> VP -> S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
|
|
||||||
She, They : NP ; She=[0,"she"];
|
|
||||||
Sleep : VP ; They=[1,"they"];
|
|
||||||
Sleep=[["sleeps","sleep"]];
|
|
||||||
} } ;
|
|
||||||
|
|
||||||
concrete Eng of Ex = { concrete Eng {
|
|
||||||
lincat lincat
|
|
||||||
S = {s : Str} ; S=[()];
|
|
||||||
NP = {s : Str ; n : Num} ; NP=[1,()];
|
|
||||||
VP = {s : Num => Str} ; VP=[[(),()]];
|
|
||||||
param
|
|
||||||
Num = Sg | Pl ;
|
|
||||||
lin lin
|
|
||||||
Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
|
|
||||||
s = np.s ++ vp.s ! np.n} ;
|
|
||||||
She = {s = "she" ; n = Sg} ; She=[0,"she"];
|
|
||||||
They = {s = "they" ; n = Pl} ; They = [1, "they"];
|
|
||||||
Sleep = {s = table { Sleep=[["sleeps","sleep"]];
|
|
||||||
Sg => "sleeps" ;
|
|
||||||
Pl => "sleep"
|
|
||||||
}
|
|
||||||
} ;
|
|
||||||
} } ;
|
|
||||||
|
|
||||||
concrete Swe of Ex = { concrete Swe {
|
|
||||||
lincat lincat
|
|
||||||
S = {s : Str} ; S=[()];
|
|
||||||
NP = {s : Str} ; NP=[()];
|
|
||||||
VP = {s : Str} ; VP=[()];
|
|
||||||
param
|
|
||||||
Num = Sg | Pl ;
|
|
||||||
lin lin
|
|
||||||
Pred np vp = { Pred = [(($0!0),($1!0))];
|
|
||||||
s = np.s ++ vp.s} ;
|
|
||||||
She = {s = "hon"} ; She = ["hon"];
|
|
||||||
They = {s = "de"} ; They = ["de"];
|
|
||||||
Sleep = {s = "sover"} ; Sleep = ["sover"];
|
|
||||||
} } ;
|
|
||||||
```
|
|
||||||
|
|
||||||
==The syntax of GFCC files==
|
|
||||||
|
|
||||||
The complete BNFC grammar, from which
|
|
||||||
the rules in this section are taken, is in the file
|
|
||||||
[``GF/GFCC/GFCC.cf`` ../DataGFCC.cf].
|
|
||||||
|
|
||||||
|
|
||||||
===Top level===
|
|
||||||
|
|
||||||
A grammar has a header telling the name of the abstract syntax
|
|
||||||
(often specifying an application domain), and the names of
|
|
||||||
the concrete languages. The abstract syntax and the concrete
|
|
||||||
syntaxes themselves follow.
|
|
||||||
```
|
|
||||||
Grm. Grammar ::=
|
|
||||||
"grammar" CId "(" [CId] ")" ";"
|
|
||||||
Abstract ";"
|
|
||||||
[Concrete] ;
|
|
||||||
|
|
||||||
Abs. Abstract ::=
|
|
||||||
"abstract" "{"
|
|
||||||
"flags" [Flag]
|
|
||||||
"fun" [FunDef]
|
|
||||||
"cat" [CatDef]
|
|
||||||
"}" ;
|
|
||||||
|
|
||||||
Cnc. Concrete ::=
|
|
||||||
"concrete" CId "{"
|
|
||||||
"flags" [Flag]
|
|
||||||
"lin" [LinDef]
|
|
||||||
"oper" [LinDef]
|
|
||||||
"lincat" [LinDef]
|
|
||||||
"lindef" [LinDef]
|
|
||||||
"printname" [LinDef]
|
|
||||||
"}" ;
|
|
||||||
```
|
|
||||||
This syntax organizes each module to a sequence of **fields**, such
|
|
||||||
as flags, linearizations, operations, linearization types, etc.
|
|
||||||
It is envisaged that particular applications can ignore some
|
|
||||||
of the fields, typically so that earlier fields are more
|
|
||||||
important than later ones.
|
|
||||||
|
|
||||||
The judgement forms have the following syntax.
|
|
||||||
```
|
|
||||||
Flg. Flag ::= CId "=" String ;
|
|
||||||
Cat. CatDef ::= CId "[" [Hypo] "]" ;
|
|
||||||
Fun. FunDef ::= CId ":" Type "=" Exp ;
|
|
||||||
Lin. LinDef ::= CId "=" Term ;
|
|
||||||
```
|
|
||||||
For the run-time system, the reference implementation in Haskell
|
|
||||||
uses a structure that gives efficient look-up:
|
|
||||||
```
|
|
||||||
data GFCC = GFCC {
|
|
||||||
absname :: CId ,
|
|
||||||
cncnames :: [CId] ,
|
|
||||||
abstract :: Abstr ,
|
|
||||||
concretes :: Map CId Concr
|
|
||||||
}
|
|
||||||
|
|
||||||
data Abstr = Abstr {
|
|
||||||
aflags :: Map CId String, -- value of a flag
|
|
||||||
funs :: Map CId (Type,Exp), -- type and def of a fun
|
|
||||||
cats :: Map CId [Hypo], -- context of a cat
|
|
||||||
catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup)
|
|
||||||
}
|
|
||||||
|
|
||||||
data Concr = Concr {
|
|
||||||
flags :: Map CId String, -- value of a flag
|
|
||||||
lins :: Map CId Term, -- lin of a fun
|
|
||||||
opers :: Map CId Term, -- oper generated by subex elim
|
|
||||||
lincats :: Map CId Term, -- lin type of a cat
|
|
||||||
lindefs :: Map CId Term, -- lin default of a cat
|
|
||||||
printnames :: Map CId Term -- printname of a cat or a fun
|
|
||||||
}
|
|
||||||
```
|
|
||||||
These definitions are from [``GF/GFCC/DataGFCC.hs`` ../DataGFCC.hs].
|
|
||||||
|
|
||||||
Identifiers (``CId``) are like ``Ident`` in GF, except that
|
|
||||||
the compiler produces constants prefixed with ``_`` in
|
|
||||||
the common subterm elimination optimization.
|
|
||||||
```
|
|
||||||
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
===Abstract syntax===
|
|
||||||
|
|
||||||
Types are first-order function types built from argument type
|
|
||||||
contexts and value types.
|
|
||||||
category symbols. Syntax trees (``Exp``) are
|
|
||||||
rose trees with nodes consisting of a head (``Atom``) and
|
|
||||||
bound variables (``CId``).
|
|
||||||
```
|
|
||||||
DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ;
|
|
||||||
DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ;
|
|
||||||
Hyp. Hypo ::= CId ":" Type ;
|
|
||||||
```
|
|
||||||
The head Atom is either a function
|
|
||||||
constant, a bound variable, or a metavariable, or a string, integer, or float
|
|
||||||
literal.
|
|
||||||
```
|
|
||||||
AC. Atom ::= CId ;
|
|
||||||
AS. Atom ::= String ;
|
|
||||||
AI. Atom ::= Integer ;
|
|
||||||
AF. Atom ::= Double ;
|
|
||||||
AM. Atom ::= "?" Integer ;
|
|
||||||
```
|
|
||||||
The context-free types and trees of the "old GFCC" are special
|
|
||||||
cases, which can be defined as follows:
|
|
||||||
```
|
|
||||||
Typ. Type ::= [CId] "->" CId
|
|
||||||
Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val
|
|
||||||
|
|
||||||
Tr. Exp ::= "(" CId [Exp] ")"
|
|
||||||
Tr fun exps = DTr [] fun exps
|
|
||||||
```
|
|
||||||
To store semantic (``def``) definitions by cases, the following expression
|
|
||||||
form is provided, but it is only meaningful in the last field of a function
|
|
||||||
declaration in an abstract syntax:
|
|
||||||
```
|
|
||||||
EEq. Exp ::= "{" [Equation] "}" ;
|
|
||||||
Equ. Equation ::= [Exp] "->" Exp ;
|
|
||||||
```
|
|
||||||
Notice that expressions are used to encode patterns. Primitive notions
|
|
||||||
(the default semantics in GF) are encoded as empty sets of equations
|
|
||||||
(``[]``). For a constructor (canonical form) of a category ``C``, we
|
|
||||||
aim to use the encoding as the application ``(_constr C)``.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Concrete syntax===
|
|
||||||
|
|
||||||
Linearization terms (``Term``) are built as follows.
|
|
||||||
Constructor names are shown to make the later code
|
|
||||||
examples readable.
|
|
||||||
```
|
|
||||||
R. Term ::= "[" [Term] "]" ; -- array (record/table)
|
|
||||||
P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection)
|
|
||||||
S. Term ::= "(" [Term] ")" ; -- concatenated sequence
|
|
||||||
K. Term ::= Tokn ; -- token
|
|
||||||
V. Term ::= "$" Integer ; -- argument (subtree)
|
|
||||||
C. Term ::= Integer ; -- array index (label/parameter value)
|
|
||||||
FV. Term ::= "[|" [Term] "|]" ; -- free variation
|
|
||||||
TM. Term ::= "?" ; -- linearization of metavariable
|
|
||||||
```
|
|
||||||
Tokens are strings or (maybe obsolescent) prefix-dependent
|
|
||||||
variant lists.
|
|
||||||
```
|
|
||||||
KS. Tokn ::= String ;
|
|
||||||
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
|
|
||||||
Var. Variant ::= [String] "/" [String] ;
|
|
||||||
```
|
|
||||||
Two special forms of terms are introduced by the compiler
|
|
||||||
as optimizations. They can in principle be eliminated, but
|
|
||||||
their presence makes grammars much more compact. Their semantics
|
|
||||||
will be explained in a later section.
|
|
||||||
```
|
|
||||||
F. Term ::= CId ; -- global constant
|
|
||||||
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
|
|
||||||
```
|
|
||||||
There is also a deprecated form of "record parameter alias",
|
|
||||||
```
|
|
||||||
RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED
|
|
||||||
```
|
|
||||||
which will be removed when the migration to new GFCC is complete.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==The semantics of concrete syntax terms==
|
|
||||||
|
|
||||||
The code in this section is from [``GF/GFCC/Linearize.hs`` ../Linearize.hs].
|
|
||||||
|
|
||||||
|
|
||||||
===Linearization and realization===
|
|
||||||
|
|
||||||
The linearization algorithm is essentially the same as in
|
|
||||||
GFC: a tree is linearized by evaluating its linearization term
|
|
||||||
in the environment of the linearizations of the subtrees.
|
|
||||||
Literal atoms are linearized in the obvious way.
|
|
||||||
The function also needs to know the language (i.e. concrete syntax)
|
|
||||||
in which linearization is performed.
|
|
||||||
```
|
|
||||||
linExp :: GFCC -> CId -> Exp -> Term
|
|
||||||
linExp gfcc lang tree@(DTr _ at trees) = case at of
|
|
||||||
AC fun -> comp (Prelude.map lin trees) $ look fun
|
|
||||||
AS s -> R [kks (show s)] -- quoted
|
|
||||||
AI i -> R [kks (show i)]
|
|
||||||
AF d -> R [kks (show d)]
|
|
||||||
AM -> TM
|
|
||||||
where
|
|
||||||
lin = linExp gfcc lang
|
|
||||||
comp = compute gfcc lang
|
|
||||||
look = lookLin gfcc lang
|
|
||||||
```
|
|
||||||
TODO: bindings must be supported.
|
|
||||||
|
|
||||||
The result of linearization is usually a record, which is realized as
|
|
||||||
a string using the following algorithm.
|
|
||||||
```
|
|
||||||
realize :: Term -> String
|
|
||||||
realize trm = case trm of
|
|
||||||
R (t:_) -> realize t
|
|
||||||
S ss -> unwords $ Prelude.map realize ss
|
|
||||||
K (KS s) -> s
|
|
||||||
K (KP s _) -> unwords s ---- prefix choice TODO
|
|
||||||
W s t -> s ++ realize t
|
|
||||||
FV (t:_) -> realize t
|
|
||||||
TM -> "?"
|
|
||||||
```
|
|
||||||
Notice that realization always picks the first field of a record.
|
|
||||||
If a linearization type has more than one field, the first field
|
|
||||||
does not necessarily contain the desired string.
|
|
||||||
Also notice that the order of record fields in GFCC is not necessarily
|
|
||||||
the same as in GF source.
|
|
||||||
|
|
||||||
|
|
||||||
===Term evaluation===
|
|
||||||
|
|
||||||
Evaluation follows call-by-value order, with two environments
|
|
||||||
needed:
|
|
||||||
- the grammar (a concrete syntax) to give the global constants
|
|
||||||
- an array of terms to give the subtree linearizations
|
|
||||||
|
|
||||||
|
|
||||||
The code is presented in one-level pattern matching, to
|
|
||||||
enable reimplementations in languages that do not permit
|
|
||||||
deep patterns (such as Java and C++).
|
|
||||||
```
|
|
||||||
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
|
||||||
compute gfcc lang args = comp where
|
|
||||||
comp trm = case trm of
|
|
||||||
P r p -> proj (comp r) (comp p)
|
|
||||||
W s t -> W s (comp t)
|
|
||||||
R ts -> R $ Prelude.map comp ts
|
|
||||||
V i -> idx args (fromInteger i) -- already computed
|
|
||||||
F c -> comp $ look c -- not computed (if contains V)
|
|
||||||
FV ts -> FV $ Prelude.map comp ts
|
|
||||||
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
|
||||||
_ -> trm
|
|
||||||
|
|
||||||
look = lookOper gfcc lang
|
|
||||||
|
|
||||||
idx xs i = xs !! i
|
|
||||||
|
|
||||||
proj r p = case (r,p) of
|
|
||||||
(_, FV ts) -> FV $ Prelude.map (proj r) ts
|
|
||||||
(FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts
|
|
||||||
(W s t, _) -> kks (s ++ getString (proj t p))
|
|
||||||
_ -> comp $ getField r (getIndex p)
|
|
||||||
|
|
||||||
getString t = case t of
|
|
||||||
K (KS s) -> s
|
|
||||||
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
|
|
||||||
|
|
||||||
getIndex t = case t of
|
|
||||||
C i -> fromInteger i
|
|
||||||
RP p _ -> getIndex p
|
|
||||||
TM -> 0 -- default value for parameter
|
|
||||||
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
|
|
||||||
|
|
||||||
getField t i = case t of
|
|
||||||
R rs -> idx rs i
|
|
||||||
RP _ r -> getField r i
|
|
||||||
TM -> TM
|
|
||||||
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
|
|
||||||
```
|
|
||||||
|
|
||||||
===The special term constructors===
|
|
||||||
|
|
||||||
The three forms introduced by the compiler may a need special
|
|
||||||
explanation.
|
|
||||||
|
|
||||||
Global constants
|
|
||||||
```
|
|
||||||
Term ::= CId ;
|
|
||||||
```
|
|
||||||
are shorthands for complex terms. They are produced by the
|
|
||||||
compiler by (iterated) **common subexpression elimination**.
|
|
||||||
They are often more powerful than hand-devised code sharing in the source
|
|
||||||
code. They could be computed off-line by replacing each identifier by
|
|
||||||
its definition.
|
|
||||||
|
|
||||||
**Prefix-suffix tables**
|
|
||||||
```
|
|
||||||
Term ::= "(" String "+" Term ")" ;
|
|
||||||
```
|
|
||||||
represent tables of word forms divided to the longest common prefix
|
|
||||||
and its array of suffixes. In the example grammar above, we have
|
|
||||||
```
|
|
||||||
Sleep = [("sleep" + ["s",""])]
|
|
||||||
```
|
|
||||||
which in fact is equal to the array of full forms
|
|
||||||
```
|
|
||||||
["sleeps", "sleep"]
|
|
||||||
```
|
|
||||||
The power of this construction comes from the fact that suffix sets
|
|
||||||
tend to be repeated in a language, and can therefore be collected
|
|
||||||
by common subexpression elimination. It is this technique that
|
|
||||||
explains the used syntax rather than the more accurate
|
|
||||||
```
|
|
||||||
"(" String "+" [String] ")"
|
|
||||||
```
|
|
||||||
since we want the suffix part to be a ``Term`` for the optimization to
|
|
||||||
take effect.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Compiling to GFCC==
|
|
||||||
|
|
||||||
Compilation to GFCC is performed by the GF grammar compiler, and
|
|
||||||
GFCC interpreters need not know what it does. For grammar writers,
|
|
||||||
however, it might be interesting to know what happens to the grammars
|
|
||||||
in the process.
|
|
||||||
|
|
||||||
The compilation phases are the following
|
|
||||||
+ type check and partially evaluate GF source
|
|
||||||
+ create a symbol table mapping the GF parameter and record types to
|
|
||||||
fixed-size arrays, and parameter values and record labels to integers
|
|
||||||
+ traverse the linearization rules replacing parameters and labels by integers
|
|
||||||
+ reorganize the created GF grammar so that it has just one abstract syntax
|
|
||||||
and one concrete syntax per language
|
|
||||||
+ TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the
|
|
||||||
``coding`` flag)
|
|
||||||
+ translate the GF grammar object to a GFCC grammar object, using a simple
|
|
||||||
compositional mapping
|
|
||||||
+ perform the word-suffix optimization on GFCC linearization terms
|
|
||||||
+ perform subexpression elimination on each concrete syntax module
|
|
||||||
+ print out the GFCC code
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Problems in GFCC compilation===
|
|
||||||
|
|
||||||
Two major problems had to be solved in compiling GF to GFCC:
|
|
||||||
- consistent order of tables and records, to permit the array translation
|
|
||||||
- run-time variables in complex parameter values.
|
|
||||||
|
|
||||||
|
|
||||||
The current implementation is still experimental and may fail
|
|
||||||
to generate correct code. Any errors remaining are likely to be
|
|
||||||
related to the two problems just mentioned.
|
|
||||||
|
|
||||||
The order problem is solved in slightly different ways for tables and records.
|
|
||||||
In both cases, **eta expansion** is used to establish a
|
|
||||||
canonical order. Tables are ordered by applying the preorder induced
|
|
||||||
by ``param`` definitions. Records are ordered by sorting them by labels.
|
|
||||||
This means that
|
|
||||||
e.g. the ``s`` field will in general no longer appear as the first
|
|
||||||
field, even if it does so in the GF source code. But relying on the
|
|
||||||
order of fields in a labelled record would be misplaced anyway.
|
|
||||||
|
|
||||||
The canonical form of records is further complicated by lock fields,
|
|
||||||
i.e. dummy fields of form ``lock_C = <>``, which are added to grammar
|
|
||||||
libraries to force intensionality of linearization types. The problem
|
|
||||||
is that the absence of a lock field only generates a warning, not
|
|
||||||
an error. Therefore a GF grammar can contain objects of the same
|
|
||||||
type with and without a lock field. This problem was solved in GFCC
|
|
||||||
generation by just removing all lock fields (defined as fields whose
|
|
||||||
type is the empty record type). This has the further advantage of
|
|
||||||
(slightly) reducing the grammar size. More importantly, it is safe
|
|
||||||
to remove lock fields, because they are never used in computation,
|
|
||||||
and because intensional types are only needed in grammars reused
|
|
||||||
as libraries, not in grammars used at runtime.
|
|
||||||
|
|
||||||
While the order problem is rather bureaucratic in nature, run-time
|
|
||||||
variables are an interesting problem. They arise in the presence
|
|
||||||
of complex parameter values, created by argument-taking constructors
|
|
||||||
and parameter records. To give an example, consider the GF parameter
|
|
||||||
type system
|
|
||||||
```
|
|
||||||
Number = Sg | Pl ;
|
|
||||||
Person = P1 | P2 | P3 ;
|
|
||||||
Agr = Ag Number Person ;
|
|
||||||
```
|
|
||||||
The values can be translated to integers in the expected way,
|
|
||||||
```
|
|
||||||
Sg = 0, Pl = 1
|
|
||||||
P1 = 0, P2 = 1, P3 = 2
|
|
||||||
Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
|
|
||||||
Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
|
|
||||||
```
|
|
||||||
However, an argument of ``Agr`` can be a run-time variable, as in
|
|
||||||
```
|
|
||||||
Ag np.n P3
|
|
||||||
```
|
|
||||||
This expression must first be translated to a case expression,
|
|
||||||
```
|
|
||||||
case np.n of {
|
|
||||||
0 => 2 ;
|
|
||||||
1 => 5
|
|
||||||
}
|
|
||||||
```
|
|
||||||
which can then be translated to the GFCC term
|
|
||||||
```
|
|
||||||
([2,5] ! ($0 ! $1))
|
|
||||||
```
|
|
||||||
assuming that the variable ``np`` is the first argument and that its
|
|
||||||
``Number`` field is the second in the record.
|
|
||||||
|
|
||||||
This transformation of course has to be performed recursively, since
|
|
||||||
there can be several run-time variables in a parameter value:
|
|
||||||
```
|
|
||||||
Ag np.n np.p
|
|
||||||
```
|
|
||||||
A similar transformation would be possible to deal with the double
|
|
||||||
role of parameter records discussed above. Thus the type
|
|
||||||
```
|
|
||||||
RNP = {n : Number ; p : Person}
|
|
||||||
```
|
|
||||||
could be uniformly translated into the set ``{0,1,2,3,4,5}``
|
|
||||||
as ``Agr`` above. Selections would be simple instances of indexing.
|
|
||||||
But any projection from the record should be translated into
|
|
||||||
a case expression,
|
|
||||||
```
|
|
||||||
rnp.n ===>
|
|
||||||
case rnp of {
|
|
||||||
0 => 0 ;
|
|
||||||
1 => 0 ;
|
|
||||||
2 => 0 ;
|
|
||||||
3 => 1 ;
|
|
||||||
4 => 1 ;
|
|
||||||
5 => 1
|
|
||||||
}
|
|
||||||
```
|
|
||||||
To avoid the code bloat resulting from this, we have chosen to
|
|
||||||
deal with records by a **currying** transformation:
|
|
||||||
```
|
|
||||||
table {n : Number ; p : Person} {... ...}
|
|
||||||
===>
|
|
||||||
table Number {Sg => table Person {...} ; table Person {...}}
|
|
||||||
```
|
|
||||||
This is performed when GFCC is generated. Selections with
|
|
||||||
records have to be treated likewise,
|
|
||||||
```
|
|
||||||
t ! r ===> t ! r.n ! r.p
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
===The representation of linearization types===
|
|
||||||
|
|
||||||
Linearization types (``lincat``) are not needed when generating with
|
|
||||||
GFCC, but they have been added to enable parser generation directly from
|
|
||||||
GFCC. The linearization type definitions are shown as a part of the
|
|
||||||
concrete syntax, by using terms to represent types. Here is the table
|
|
||||||
showing how different linearization types are encoded.
|
|
||||||
```
|
|
||||||
P* = max(P) -- parameter type
|
|
||||||
{r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record
|
|
||||||
(P => T)* = [T* ,...,T*] -- table, size(P) cases
|
|
||||||
Str* = ()
|
|
||||||
```
|
|
||||||
For example, the linearization type ``present/CatEng.NP`` is
|
|
||||||
translated as follows:
|
|
||||||
```
|
|
||||||
NP = {
|
|
||||||
a : { -- 6 = 2*3 values
|
|
||||||
n : {ParamX.Number} ; -- 2 values
|
|
||||||
p : {ParamX.Person} -- 3 values
|
|
||||||
} ;
|
|
||||||
s : {ResEng.Case} => Str -- 3 values
|
|
||||||
}
|
|
||||||
|
|
||||||
__NP = [[1,2],[(),(),()]]
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Running the compiler and the GFCC interpreter===
|
|
||||||
|
|
||||||
GFCC generation is a part of the
|
|
||||||
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
|
|
||||||
of GF since September 2006. To invoke the compiler, the flag
|
|
||||||
``-printer=gfcc`` to the command
|
|
||||||
``pm = print_multi`` is used. It is wise to recompile the grammar from
|
|
||||||
source, since previously compiled libraries may not obey the canonical
|
|
||||||
order of records.
|
|
||||||
Here is an example, performed in
|
|
||||||
[example/bronzeage ../../../../../examples/bronzeage].
|
|
||||||
```
|
|
||||||
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
|
|
||||||
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
|
|
||||||
strip
|
|
||||||
pm -printer=gfcc | wf bronze.gfcc
|
|
||||||
```
|
|
||||||
There is also an experimental batch compiler, which does not use the GFC
|
|
||||||
format or the record aliases. It can be produced by
|
|
||||||
```
|
|
||||||
make gfc
|
|
||||||
```
|
|
||||||
in ``GF/src``, and invoked by
|
|
||||||
```
|
|
||||||
gfc --make FILES
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==The reference interpreter==
|
|
||||||
|
|
||||||
The reference interpreter written in Haskell consists of the following files:
|
|
||||||
```
|
|
||||||
-- source file for BNFC
|
|
||||||
GFCC.cf -- labelled BNF grammar of gfcc
|
|
||||||
|
|
||||||
-- files generated by BNFC
|
|
||||||
AbsGFCC.hs -- abstrac syntax datatypes
|
|
||||||
ErrM.hs -- error monad used internally
|
|
||||||
LexGFCC.hs -- lexer of gfcc files
|
|
||||||
ParGFCC.hs -- parser of gfcc files and syntax trees
|
|
||||||
PrintGFCC.hs -- printer of gfcc files and syntax trees
|
|
||||||
|
|
||||||
-- hand-written files
|
|
||||||
DataGFCC.hs -- grammar datatype, post-parser grammar creation
|
|
||||||
Linearize.hs -- linearization and evaluation
|
|
||||||
Macros.hs -- utilities abstracting away from GFCC datatypes
|
|
||||||
Generate.hs -- random and exhaustive generation, generate-and-test parsing
|
|
||||||
API.hs -- functionalities accessible in embedded GF applications
|
|
||||||
Generate.hs -- random and exhaustive generation
|
|
||||||
Shell.hs -- main function - a simple command interpreter
|
|
||||||
```
|
|
||||||
It is included in the
|
|
||||||
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
|
|
||||||
of GF, in the subdirectories [``GF/src/GF/GFCC`` ../] and
|
|
||||||
[``GF/src/GF/Devel`` ../../Devel].
|
|
||||||
|
|
||||||
As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir
|
|
||||||
Angelov). The interpreter uses the relevant modules
|
|
||||||
```
|
|
||||||
GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC
|
|
||||||
GF/Parsing/FCFG.hs -- run the parser
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
To compile the interpreter, type
|
|
||||||
```
|
|
||||||
make gfcc
|
|
||||||
```
|
|
||||||
in ``GF/src``. To run it, type
|
|
||||||
```
|
|
||||||
./gfcc <GFCC-file>
|
|
||||||
```
|
|
||||||
The available commands are
|
|
||||||
- ``gr <Cat> <Int>``: generate a number of random trees in category.
|
|
||||||
and show their linearizations in all languages
|
|
||||||
- ``grt <Cat> <Int>``: generate a number of random trees in category.
|
|
||||||
and show the trees and their linearizations in all languages
|
|
||||||
- ``gt <Cat> <Int>``: generate a number of trees in category from smallest,
|
|
||||||
and show their linearizations in all languages
|
|
||||||
- ``gtt <Cat> <Int>``: generate a number of trees in category from smallest,
|
|
||||||
and show the trees and their linearizations in all languages
|
|
||||||
- ``p <Lang> <Cat> <String>``: parse a string into a set of trees
|
|
||||||
- ``lin <Tree>``: linearize tree in all languages, also showing full records
|
|
||||||
- ``q``: terminate the system cleanly
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Embedded formats==
|
|
||||||
|
|
||||||
- JavaScript: compiler of linearization and abstract syntax
|
|
||||||
|
|
||||||
- Haskell: compiler of abstract syntax and interpreter with parsing,
|
|
||||||
linearization, and generation
|
|
||||||
|
|
||||||
- C: compiler of linearization (old GFCC)
|
|
||||||
|
|
||||||
- C++: embedded interpreter supporting linearization (old GFCC)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Some things to do==
|
|
||||||
|
|
||||||
Support for dependent types, higher-order abstract syntax, and
|
|
||||||
semantic definition in GFCC generation and interpreters.
|
|
||||||
|
|
||||||
Replacing the entire GF shell by one based on GFCC.
|
|
||||||
|
|
||||||
Interpreter in Java.
|
|
||||||
|
|
||||||
Hand-written parsers for GFCC grammars to reduce code size
|
|
||||||
(and efficiency?) of interpreters.
|
|
||||||
|
|
||||||
Binary format and/or file compression of GFCC output.
|
|
||||||
|
|
||||||
Syntax editor based on GFCC.
|
|
||||||
|
|
||||||
Rewriting of resource libraries in order to exploit the
|
|
||||||
word-suffix sharing better (depth-one tables, as in FM).
|
|
||||||
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
Grm. Grammar ::= Header ";" Abstract ";" [Concrete] ;
|
|
||||||
Hdr. Header ::= "grammar" CId "(" [CId] ")" ;
|
|
||||||
Abs. Abstract ::= "abstract" "{" [AbsDef] "}" ;
|
|
||||||
Cnc. Concrete ::= "concrete" CId "{" [CncDef] "}" ;
|
|
||||||
|
|
||||||
Fun. AbsDef ::= CId ":" Type "=" Exp ;
|
|
||||||
--AFl. AbsDef ::= "%" CId "=" String ; -- flag
|
|
||||||
Lin. CncDef ::= CId "=" Term ;
|
|
||||||
--CFl. CncDef ::= "%" CId "=" String ; -- flag
|
|
||||||
|
|
||||||
Typ. Type ::= [CId] "->" CId ;
|
|
||||||
Tr. Exp ::= "(" Atom [Exp] ")" ;
|
|
||||||
AC. Atom ::= CId ;
|
|
||||||
AS. Atom ::= String ;
|
|
||||||
AI. Atom ::= Integer ;
|
|
||||||
AF. Atom ::= Double ;
|
|
||||||
AM. Atom ::= "?" ;
|
|
||||||
trA. Exp ::= Atom ;
|
|
||||||
define trA a = Tr a [] ;
|
|
||||||
|
|
||||||
R. Term ::= "[" [Term] "]" ; -- record/table
|
|
||||||
P. Term ::= "(" Term "!" Term ")" ; -- projection/selection
|
|
||||||
S. Term ::= "(" [Term] ")" ; -- sequence with ++
|
|
||||||
K. Term ::= Tokn ; -- token
|
|
||||||
V. Term ::= "$" Integer ; -- argument
|
|
||||||
C. Term ::= Integer ; -- parameter value/label
|
|
||||||
F. Term ::= CId ; -- global constant
|
|
||||||
FV. Term ::= "[|" [Term] "|]" ; -- free variation
|
|
||||||
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
|
|
||||||
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
|
|
||||||
TM. Term ::= "?" ; -- lin of metavariable
|
|
||||||
|
|
||||||
L. Term ::= "(" CId "->" Term ")" ; -- lambda abstracted table
|
|
||||||
BV. Term ::= "#" CId ; -- lambda-bound variable
|
|
||||||
|
|
||||||
KS. Tokn ::= String ;
|
|
||||||
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
|
|
||||||
Var. Variant ::= [String] "/" [String] ;
|
|
||||||
|
|
||||||
|
|
||||||
terminator Concrete ";" ;
|
|
||||||
terminator AbsDef ";" ;
|
|
||||||
terminator CncDef ";" ;
|
|
||||||
separator CId "," ;
|
|
||||||
separator Term "," ;
|
|
||||||
terminator Exp "" ;
|
|
||||||
terminator String "" ;
|
|
||||||
separator Variant "," ;
|
|
||||||
|
|
||||||
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
|
|
||||||
@@ -1,656 +0,0 @@
|
|||||||
The GFCC Grammar Format
|
|
||||||
Aarne Ranta
|
|
||||||
October 19, 2006
|
|
||||||
|
|
||||||
Author's address:
|
|
||||||
[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne]
|
|
||||||
|
|
||||||
% to compile: txt2tags -thtml --toc gfcc.txt
|
|
||||||
|
|
||||||
History:
|
|
||||||
- 19 Oct: translation of lincats, new figures on C++
|
|
||||||
- 3 Oct 2006: first version
|
|
||||||
|
|
||||||
|
|
||||||
==What is GFCC==
|
|
||||||
|
|
||||||
GFCC is a low-level format for GF grammars. Its aim is to contain the minimum
|
|
||||||
that is needed to process GF grammars at runtime. This minimality has three
|
|
||||||
advantages:
|
|
||||||
- compact grammar files and run-time objects
|
|
||||||
- time and space efficient processing
|
|
||||||
- simple definition of interpreters
|
|
||||||
|
|
||||||
|
|
||||||
The idea is that all embedded GF applications are compiled to GFCC.
|
|
||||||
The GF system would be primarily used as a compiler and as a grammar
|
|
||||||
development tool.
|
|
||||||
|
|
||||||
Since GFCC is implemented in BNFC, a parser of the format is readily
|
|
||||||
available for C, C++, Haskell, Java, and OCaml. Also an XML
|
|
||||||
representation is generated in BNFC. A
|
|
||||||
[reference implementation ../]
|
|
||||||
of linearization and some other functions has been written in Haskell.
|
|
||||||
|
|
||||||
|
|
||||||
==GFCC vs. GFC==
|
|
||||||
|
|
||||||
GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed
|
|
||||||
to be a run-time format, but also to
|
|
||||||
support separate compilation of grammars, i.e.
|
|
||||||
to store the results of compiling
|
|
||||||
individual GF modules. But this means that GFC has to contain extra information,
|
|
||||||
such as type annotations, which is only needed in compilation and not at
|
|
||||||
run-time. In particular, the pattern matching syntax and semantics of GFC is
|
|
||||||
complex and therefore difficult to implement in new platforms.
|
|
||||||
|
|
||||||
The main differences of GFCC compared with GFC can be summarized as follows:
|
|
||||||
- there are no modules, and therefore no qualified names
|
|
||||||
- a GFCC grammar is multilingual, and consists of a common abstract syntax
|
|
||||||
together with one concrete syntax per language
|
|
||||||
- records and tables are replaced by arrays
|
|
||||||
- record labels and parameter values are replaced by integers
|
|
||||||
- record projection and table selection are replaced by array indexing
|
|
||||||
- there is (so far) no support for dependent types or higher-order abstract
|
|
||||||
syntax (which would be easy to add, but make interpreters much more difficult
|
|
||||||
to write)
|
|
||||||
|
|
||||||
|
|
||||||
Here is an example of a GF grammar, consisting of three modules,
|
|
||||||
as translated to GFCC. The representations are aligned, with the exceptions
|
|
||||||
due to the alphabetical sorting of GFCC grammars.
|
|
||||||
```
|
|
||||||
grammar Ex(Eng,Swe);
|
|
||||||
|
|
||||||
abstract Ex = { abstract {
|
|
||||||
cat
|
|
||||||
S ; NP ; VP ;
|
|
||||||
fun
|
|
||||||
Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred);
|
|
||||||
She, They : NP ; She : -> NP = (She);
|
|
||||||
Sleep : VP ; Sleep : -> VP = (Sleep);
|
|
||||||
They : -> NP = (They);
|
|
||||||
} } ;
|
|
||||||
|
|
||||||
concrete Eng of Ex = { concrete Eng {
|
|
||||||
lincat
|
|
||||||
S = {s : Str} ;
|
|
||||||
NP = {s : Str ; n : Num} ;
|
|
||||||
VP = {s : Num => Str} ;
|
|
||||||
param
|
|
||||||
Num = Sg | Pl ;
|
|
||||||
lin
|
|
||||||
Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))];
|
|
||||||
s = np.s ++ vp.s ! np.n} ;
|
|
||||||
She = {s = "she" ; n = Sg} ; She = [0, "she"];
|
|
||||||
They = {s = "they" ; n = Pl} ;
|
|
||||||
Sleep = {s = table { Sleep = [("sleep" + ["s",""])];
|
|
||||||
Sg => "sleeps" ;
|
|
||||||
Pl => "sleep" They = [1, "they"];
|
|
||||||
} } ;
|
|
||||||
} ;
|
|
||||||
}
|
|
||||||
|
|
||||||
concrete Swe of Ex = { concrete Swe {
|
|
||||||
lincat
|
|
||||||
S = {s : Str} ;
|
|
||||||
NP = {s : Str} ;
|
|
||||||
VP = {s : Str} ;
|
|
||||||
param
|
|
||||||
Num = Sg | Pl ;
|
|
||||||
lin
|
|
||||||
Pred np vp = { Pred = [(($0!0),($1!0))];
|
|
||||||
s = np.s ++ vp.s} ;
|
|
||||||
She = {s = "hon"} ; She = ["hon"];
|
|
||||||
They = {s = "de"} ; They = ["de"];
|
|
||||||
Sleep = {s = "sover"} ; Sleep = ["sover"];
|
|
||||||
} } ;
|
|
||||||
```
|
|
||||||
|
|
||||||
==The syntax of GFCC files==
|
|
||||||
|
|
||||||
===Top level===
|
|
||||||
|
|
||||||
A grammar has a header telling the name of the abstract syntax
|
|
||||||
(often specifying an application domain), and the names of
|
|
||||||
the concrete languages. The abstract syntax and the concrete
|
|
||||||
syntaxes themselves follow.
|
|
||||||
```
|
|
||||||
Grammar ::= Header ";" Abstract ";" [Concrete] ;
|
|
||||||
Header ::= "grammar" CId "(" [CId] ")" ;
|
|
||||||
Abstract ::= "abstract" "{" [AbsDef] "}" ;
|
|
||||||
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
|
|
||||||
```
|
|
||||||
Abstract syntax judgements give typings and semantic definitions.
|
|
||||||
Concrete syntax judgements give linearizations.
|
|
||||||
```
|
|
||||||
AbsDef ::= CId ":" Type "=" Exp ;
|
|
||||||
CncDef ::= CId "=" Term ;
|
|
||||||
```
|
|
||||||
Also flags are possible, local to each "module" (i.e. abstract and concretes).
|
|
||||||
```
|
|
||||||
AbsDef ::= "%" CId "=" String ;
|
|
||||||
CncDef ::= "%" CId "=" String ;
|
|
||||||
```
|
|
||||||
For the run-time system, the reference implementation in Haskell
|
|
||||||
uses a structure that gives efficient look-up:
|
|
||||||
```
|
|
||||||
data GFCC = GFCC {
|
|
||||||
absname :: CId ,
|
|
||||||
cncnames :: [CId] ,
|
|
||||||
abstract :: Abstr ,
|
|
||||||
concretes :: Map CId Concr
|
|
||||||
}
|
|
||||||
|
|
||||||
data Abstr = Abstr {
|
|
||||||
funs :: Map CId Type, -- find the type of a fun
|
|
||||||
cats :: Map CId [CId] -- find the funs giving a cat
|
|
||||||
}
|
|
||||||
|
|
||||||
type Concr = Map CId Term
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
===Abstract syntax===
|
|
||||||
|
|
||||||
Types are first-order function types built from
|
|
||||||
category symbols. Syntax trees (``Exp``) are
|
|
||||||
rose trees with the head (``Atom``) either a function
|
|
||||||
constant, a metavariable, or a string, integer, or float
|
|
||||||
literal.
|
|
||||||
```
|
|
||||||
Type ::= [CId] "->" CId ;
|
|
||||||
Exp ::= "(" Atom [Exp] ")" ;
|
|
||||||
Atom ::= CId ; -- function constant
|
|
||||||
Atom ::= "?" ; -- metavariable
|
|
||||||
Atom ::= String ; -- string literal
|
|
||||||
Atom ::= Integer ; -- integer literal
|
|
||||||
Atom ::= Double ; -- float literal
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
===Concrete syntax===
|
|
||||||
|
|
||||||
Linearization terms (``Term``) are built as follows.
|
|
||||||
Constructor names are shown to make the later code
|
|
||||||
examples readable.
|
|
||||||
```
|
|
||||||
R. Term ::= "[" [Term] "]" ; -- array
|
|
||||||
P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field
|
|
||||||
S. Term ::= "(" [Term] ")" ; -- sequence with ++
|
|
||||||
K. Term ::= Tokn ; -- token
|
|
||||||
V. Term ::= "$" Integer ; -- argument
|
|
||||||
C. Term ::= Integer ; -- array index
|
|
||||||
FV. Term ::= "[|" [Term] "|]" ; -- free variation
|
|
||||||
TM. Term ::= "?" ; -- linearization of metavariable
|
|
||||||
```
|
|
||||||
Tokens are strings or (maybe obsolescent) prefix-dependent
|
|
||||||
variant lists.
|
|
||||||
```
|
|
||||||
KS. Tokn ::= String ;
|
|
||||||
KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
|
|
||||||
Var. Variant ::= [String] "/" [String] ;
|
|
||||||
```
|
|
||||||
Three special forms of terms are introduced by the compiler
|
|
||||||
as optimizations. They can in principle be eliminated, but
|
|
||||||
their presence makes grammars much more compact. Their semantics
|
|
||||||
will be explained in a later section.
|
|
||||||
```
|
|
||||||
F. Term ::= CId ; -- global constant
|
|
||||||
W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
|
|
||||||
RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
|
|
||||||
```
|
|
||||||
Identifiers are like ``Ident`` in GF and GFC, except that
|
|
||||||
the compiler produces constants prefixed with ``_`` in
|
|
||||||
the common subterm elimination optimization.
|
|
||||||
```
|
|
||||||
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
==The semantics of concrete syntax terms==
|
|
||||||
|
|
||||||
===Linearization and realization===
|
|
||||||
|
|
||||||
The linearization algorithm is essentially the same as in
|
|
||||||
GFC: a tree is linearized by evaluating its linearization term
|
|
||||||
in the environment of the linearizations of the subtrees.
|
|
||||||
Literal atoms are linearized in the obvious way.
|
|
||||||
The function also needs to know the language (i.e. concrete syntax)
|
|
||||||
in which linearization is performed.
|
|
||||||
```
|
|
||||||
linExp :: GFCC -> CId -> Exp -> Term
|
|
||||||
linExp mcfg lang tree@(Tr at trees) = case at of
|
|
||||||
AC fun -> comp (Prelude.map lin trees) $ look fun
|
|
||||||
AS s -> R [kks (show s)] -- quoted
|
|
||||||
AI i -> R [kks (show i)]
|
|
||||||
AF d -> R [kks (show d)]
|
|
||||||
AM -> TM
|
|
||||||
where
|
|
||||||
lin = linExp mcfg lang
|
|
||||||
comp = compute mcfg lang
|
|
||||||
look = lookLin mcfg lang
|
|
||||||
```
|
|
||||||
The result of linearization is usually a record, which is realized as
|
|
||||||
a string using the following algorithm.
|
|
||||||
```
|
|
||||||
realize :: Term -> String
|
|
||||||
realize trm = case trm of
|
|
||||||
R (t:_) -> realize t
|
|
||||||
S ss -> unwords $ Prelude.map realize ss
|
|
||||||
K (KS s) -> s
|
|
||||||
K (KP s _) -> unwords s ---- prefix choice TODO
|
|
||||||
W s t -> s ++ realize t
|
|
||||||
FV (t:_) -> realize t
|
|
||||||
TM -> "?"
|
|
||||||
```
|
|
||||||
Since the order of record fields is not necessarily
|
|
||||||
the same as in GF source,
|
|
||||||
this realization does not work securely for
|
|
||||||
categories whose lincats more than one field.
|
|
||||||
|
|
||||||
|
|
||||||
===Term evaluation===
|
|
||||||
|
|
||||||
Evaluation follows call-by-value order, with two environments
|
|
||||||
needed:
|
|
||||||
- the grammar (a concrete syntax) to give the global constants
|
|
||||||
- an array of terms to give the subtree linearizations
|
|
||||||
|
|
||||||
|
|
||||||
The code is presented in one-level pattern matching, to
|
|
||||||
enable reimplementations in languages that do not permit
|
|
||||||
deep patterns (such as Java and C++).
|
|
||||||
```
|
|
||||||
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
|
||||||
compute mcfg lang args = comp where
|
|
||||||
comp trm = case trm of
|
|
||||||
P r p -> proj (comp r) (comp p)
|
|
||||||
RP i t -> RP (comp i) (comp t)
|
|
||||||
W s t -> W s (comp t)
|
|
||||||
R ts -> R $ Prelude.map comp ts
|
|
||||||
V i -> idx args (fromInteger i) -- already computed
|
|
||||||
F c -> comp $ look c -- not computed (if contains V)
|
|
||||||
FV ts -> FV $ Prelude.map comp ts
|
|
||||||
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
|
||||||
_ -> trm
|
|
||||||
|
|
||||||
look = lookLin mcfg lang
|
|
||||||
|
|
||||||
idx xs i = xs !! i
|
|
||||||
|
|
||||||
proj r p = case (r,p) of
|
|
||||||
(_, FV ts) -> FV $ Prelude.map (proj r) ts
|
|
||||||
(W s t, _) -> kks (s ++ getString (proj t p))
|
|
||||||
_ -> comp $ getField r (getIndex p)
|
|
||||||
|
|
||||||
getString t = case t of
|
|
||||||
K (KS s) -> s
|
|
||||||
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
|
|
||||||
|
|
||||||
getIndex t = case t of
|
|
||||||
C i -> fromInteger i
|
|
||||||
RP p _ -> getIndex p
|
|
||||||
TM -> 0 -- default value for parameter
|
|
||||||
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
|
|
||||||
|
|
||||||
getField t i = case t of
|
|
||||||
R rs -> idx rs i
|
|
||||||
RP _ r -> getField r i
|
|
||||||
TM -> TM
|
|
||||||
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
|
|
||||||
```
|
|
||||||
|
|
||||||
===The special term constructors===
|
|
||||||
|
|
||||||
The three forms introduced by the compiler may a need special
|
|
||||||
explanation.
|
|
||||||
|
|
||||||
Global constants
|
|
||||||
```
|
|
||||||
Term ::= CId ;
|
|
||||||
```
|
|
||||||
are shorthands for complex terms. They are produced by the
|
|
||||||
compiler by (iterated) common subexpression elimination.
|
|
||||||
They are often more powerful than hand-devised code sharing in the source
|
|
||||||
code. They could be computed off-line by replacing each identifier by
|
|
||||||
its definition.
|
|
||||||
|
|
||||||
Prefix-suffix tables
|
|
||||||
```
|
|
||||||
Term ::= "(" String "+" Term ")" ;
|
|
||||||
```
|
|
||||||
represent tables of word forms divided to the longest common prefix
|
|
||||||
and its array of suffixes. In the example grammar above, we have
|
|
||||||
```
|
|
||||||
Sleep = [("sleep" + ["s",""])]
|
|
||||||
```
|
|
||||||
which in fact is equal to the array of full forms
|
|
||||||
```
|
|
||||||
["sleeps", "sleep"]
|
|
||||||
```
|
|
||||||
The power of this construction comes from the fact that suffix sets
|
|
||||||
tend to be repeated in a language, and can therefore be collected
|
|
||||||
by common subexpression elimination. It is this technique that
|
|
||||||
explains the used syntax rather than the more accurate
|
|
||||||
```
|
|
||||||
"(" String "+" [String] ")"
|
|
||||||
```
|
|
||||||
since we want the suffix part to be a ``Term`` for the optimization to
|
|
||||||
take effect.
|
|
||||||
|
|
||||||
The most curious construct of GFCC is the parameter array alias,
|
|
||||||
```
|
|
||||||
Term ::= "(" Term "@" Term ")";
|
|
||||||
```
|
|
||||||
This form is used as the value of parameter records, such as the type
|
|
||||||
```
|
|
||||||
{n : Number ; p : Person}
|
|
||||||
```
|
|
||||||
The problem with parameter records is their double role.
|
|
||||||
They can be used like parameter values, as indices in selection,
|
|
||||||
```
|
|
||||||
VP.s ! {n = Sg ; p = P3}
|
|
||||||
```
|
|
||||||
but also as records, from which parameters can be projected:
|
|
||||||
```
|
|
||||||
{n = Sg ; p = P3}.n
|
|
||||||
```
|
|
||||||
Whichever use is selected as primary, a prohibitively complex
|
|
||||||
case expression must be generated at compilation to GFCC to get the
|
|
||||||
other use. The adopted
|
|
||||||
solution is to generate a pair containing both a parameter value index
|
|
||||||
and an array of indices of record fields. For instance, if we have
|
|
||||||
```
|
|
||||||
param Number = Sg | Pl ; Person = P1 | P2 | P3 ;
|
|
||||||
```
|
|
||||||
we get the encoding
|
|
||||||
```
|
|
||||||
{n = Sg ; p = P3} ---> (2 @ [0,2])
|
|
||||||
```
|
|
||||||
The GFCC computation rules are essentially
|
|
||||||
```
|
|
||||||
(t ! (i @ _)) = (t ! i)
|
|
||||||
((_ @ r) ! j) =(r ! j)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
==Compiling to GFCC==
|
|
||||||
|
|
||||||
Compilation to GFCC is performed by the GF grammar compiler, and
|
|
||||||
GFCC interpreters need not know what it does. For grammar writers,
|
|
||||||
however, it might be interesting to know what happens to the grammars
|
|
||||||
in the process.
|
|
||||||
|
|
||||||
The compilation phases are the following
|
|
||||||
+ translate GF source to GFC, as always in GF
|
|
||||||
+ undo GFC back-end optimizations
|
|
||||||
+ perform the ``values`` optimization to normalize tables
|
|
||||||
+ create a symbol table mapping the GFC parameter and record types to
|
|
||||||
fixed-size arrays, and parameter values and record labels to integers
|
|
||||||
+ traverse the linearization rules replacing parameters and labels by integers
|
|
||||||
+ reorganize the created GFC grammar so that it has just one abstract syntax
|
|
||||||
and one concrete syntax per language
|
|
||||||
+ apply UTF8 encoding to the grammar, if not yet applied (this is told by the
|
|
||||||
``coding`` flag)
|
|
||||||
+ translate the GFC syntax tree to a GFCC syntax tree, using a simple
|
|
||||||
compositional mapping
|
|
||||||
+ perform the word-suffix optimization on GFCC linearization terms
|
|
||||||
+ perform subexpression elimination on each concrete syntax module
|
|
||||||
+ print out the GFCC code
|
|
||||||
|
|
||||||
|
|
||||||
Notice that a major part of the compilation is done within GFC, so that
|
|
||||||
GFC-related tasks (such as parser generation) could be performed by
|
|
||||||
using the old algorithms.
|
|
||||||
|
|
||||||
|
|
||||||
===Problems in GFCC compilation===
|
|
||||||
|
|
||||||
Two major problems had to be solved in compiling GFC to GFCC:
|
|
||||||
- consistent order of tables and records, to permit the array translation
|
|
||||||
- run-time variables in complex parameter values.
|
|
||||||
|
|
||||||
|
|
||||||
The current implementation is still experimental and may fail
|
|
||||||
to generate correct code. Any errors remaining are likely to be
|
|
||||||
related to the two problems just mentioned.
|
|
||||||
|
|
||||||
The order problem is solved in different ways for tables and records.
|
|
||||||
For tables, the ``values`` optimization of GFC already manages to
|
|
||||||
maintain a canonical order. But this order can be destroyed by the
|
|
||||||
``share`` optimization. To make sure that GFCC compilation works properly,
|
|
||||||
it is safest to recompile the GF grammar by using the ``values``
|
|
||||||
optimization flag.
|
|
||||||
|
|
||||||
Records can be canonically ordered by sorting them by labels.
|
|
||||||
In fact, this was done in connection of the GFCC work as a part
|
|
||||||
of the GFC generation, to guarantee consistency. This means that
|
|
||||||
e.g. the ``s`` field will in general no longer appear as the first
|
|
||||||
field, even if it does so in the GF source code. But relying on the
|
|
||||||
order of fields in a labelled record would be misplaced anyway.
|
|
||||||
|
|
||||||
The canonical form of records is further complicated by lock fields,
|
|
||||||
i.e. dummy fields of form ``lock_C = <>``, which are added to grammar
|
|
||||||
libraries to force intensionality of linearization types. The problem
|
|
||||||
is that the absence of a lock field only generates a warning, not
|
|
||||||
an error. Therefore a GFC grammar can contain objects of the same
|
|
||||||
type with and without a lock field. This problem was solved in GFCC
|
|
||||||
generation by just removing all lock fields (defined as fields whose
|
|
||||||
type is the empty record type). This has the further advantage of
|
|
||||||
(slightly) reducing the grammar size. More importantly, it is safe
|
|
||||||
to remove lock fields, because they are never used in computation,
|
|
||||||
and because intensional types are only needed in grammars reused
|
|
||||||
as libraries, not in grammars used at runtime.
|
|
||||||
|
|
||||||
While the order problem is rather bureaucratic in nature, run-time
|
|
||||||
variables are an interesting problem. They arise in the presence
|
|
||||||
of complex parameter values, created by argument-taking constructors
|
|
||||||
and parameter records. To give an example, consider the GF parameter
|
|
||||||
type system
|
|
||||||
```
|
|
||||||
Number = Sg | Pl ;
|
|
||||||
Person = P1 | P2 | P3 ;
|
|
||||||
Agr = Ag Number Person ;
|
|
||||||
```
|
|
||||||
The values can be translated to integers in the expected way,
|
|
||||||
```
|
|
||||||
Sg = 0, Pl = 1
|
|
||||||
P1 = 0, P2 = 1, P3 = 2
|
|
||||||
Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
|
|
||||||
Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
|
|
||||||
```
|
|
||||||
However, an argument of ``Agr`` can be a run-time variable, as in
|
|
||||||
```
|
|
||||||
Ag np.n P3
|
|
||||||
```
|
|
||||||
This expression must first be translated to a case expression,
|
|
||||||
```
|
|
||||||
case np.n of {
|
|
||||||
0 => 2 ;
|
|
||||||
1 => 5
|
|
||||||
}
|
|
||||||
```
|
|
||||||
which can then be translated to the GFCC term
|
|
||||||
```
|
|
||||||
([2,5] ! ($0 ! $1))
|
|
||||||
```
|
|
||||||
assuming that the variable ``np`` is the first argument and that its
|
|
||||||
``Number`` field is the second in the record.
|
|
||||||
|
|
||||||
This transformation of course has to be performed recursively, since
|
|
||||||
there can be several run-time variables in a parameter value:
|
|
||||||
```
|
|
||||||
Ag np.n np.p
|
|
||||||
```
|
|
||||||
A similar transformation would be possible to deal with the double
|
|
||||||
role of parameter records discussed above. Thus the type
|
|
||||||
```
|
|
||||||
RNP = {n : Number ; p : Person}
|
|
||||||
```
|
|
||||||
could be uniformly translated into the set ``{0,1,2,3,4,5}``
|
|
||||||
as ``Agr`` above. Selections would be simple instances of indexing.
|
|
||||||
But any projection from the record should be translated into
|
|
||||||
a case expression,
|
|
||||||
```
|
|
||||||
rnp.n ===>
|
|
||||||
case rnp of {
|
|
||||||
0 => 0 ;
|
|
||||||
1 => 0 ;
|
|
||||||
2 => 0 ;
|
|
||||||
3 => 1 ;
|
|
||||||
4 => 1 ;
|
|
||||||
5 => 1
|
|
||||||
}
|
|
||||||
```
|
|
||||||
To avoid the code bloat resulting from this, we chose the alias representation
|
|
||||||
which is easy enough to deal with in interpreters.
|
|
||||||
|
|
||||||
|
|
||||||
===The representation of linearization types===
|
|
||||||
|
|
||||||
Linearization types (``lincat``) are not needed when generating with
|
|
||||||
GFCC, but they have been added to enable parser generation directly from
|
|
||||||
GFCC. The linearization type definitions are shown as a part of the
|
|
||||||
concrete syntax, by using terms to represent types. Here is the table
|
|
||||||
showing how different linearization types are encoded.
|
|
||||||
```
|
|
||||||
P* = size(P) -- parameter type
|
|
||||||
{_ : I ; __ : R}* = (I* @ R*) -- record of parameters
|
|
||||||
{r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- other record
|
|
||||||
(P => T)* = [T* ,...,T*] -- size(P) times
|
|
||||||
Str* = ()
|
|
||||||
```
|
|
||||||
The category symbols are prefixed with two underscores (``__``).
|
|
||||||
For example, the linearization type ``present/CatEng.NP`` is
|
|
||||||
translated as follows:
|
|
||||||
```
|
|
||||||
NP = {
|
|
||||||
a : { -- 6 = 2*3 values
|
|
||||||
n : {ParamX.Number} ; -- 2 values
|
|
||||||
p : {ParamX.Person} -- 3 values
|
|
||||||
} ;
|
|
||||||
s : {ResEng.Case} => Str -- 3 values
|
|
||||||
}
|
|
||||||
|
|
||||||
__NP = [(6@[2,3]),[(),(),()]]
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Running the compiler and the GFCC interpreter===
|
|
||||||
|
|
||||||
GFCC generation is a part of the
|
|
||||||
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
|
|
||||||
of GF since September 2006. To invoke the compiler, the flag
|
|
||||||
``-printer=gfcc`` to the command
|
|
||||||
``pm = print_multi`` is used. It is wise to recompile the grammar from
|
|
||||||
source, since previously compiled libraries may not obey the canonical
|
|
||||||
order of records. To ``strip`` the grammar before
|
|
||||||
GFCC translation removes unnecessary interface references.
|
|
||||||
Here is an example, performed in
|
|
||||||
[example/bronzeage ../../../../../examples/bronzeage].
|
|
||||||
```
|
|
||||||
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
|
|
||||||
i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
|
|
||||||
strip
|
|
||||||
pm -printer=gfcc | wf bronze.gfcc
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==The reference interpreter==
|
|
||||||
|
|
||||||
The reference interpreter written in Haskell consists of the following files:
|
|
||||||
```
|
|
||||||
-- source file for BNFC
|
|
||||||
GFCC.cf -- labelled BNF grammar of gfcc
|
|
||||||
|
|
||||||
-- files generated by BNFC
|
|
||||||
AbsGFCC.hs -- abstrac syntax of gfcc
|
|
||||||
ErrM.hs -- error monad used internally
|
|
||||||
LexGFCC.hs -- lexer of gfcc files
|
|
||||||
ParGFCC.hs -- parser of gfcc files and syntax trees
|
|
||||||
PrintGFCC.hs -- printer of gfcc files and syntax trees
|
|
||||||
|
|
||||||
-- hand-written files
|
|
||||||
DataGFCC.hs -- post-parser grammar creation, linearization and evaluation
|
|
||||||
GenGFCC.hs -- random and exhaustive generation, generate-and-test parsing
|
|
||||||
RunGFCC.hs -- main function - a simple command interpreter
|
|
||||||
```
|
|
||||||
It is included in the
|
|
||||||
[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html]
|
|
||||||
of GF, in the subdirectory [``GF/src/GF/Canon/GFCC`` ../].
|
|
||||||
|
|
||||||
To compile the interpreter, type
|
|
||||||
```
|
|
||||||
make gfcc
|
|
||||||
```
|
|
||||||
in ``GF/src``. To run it, type
|
|
||||||
```
|
|
||||||
./gfcc <GFCC-file>
|
|
||||||
```
|
|
||||||
The available commands are
|
|
||||||
- ``gr <Cat> <Int>``: generate a number of random trees in category.
|
|
||||||
and show their linearizations in all languages
|
|
||||||
- ``grt <Cat> <Int>``: generate a number of random trees in category.
|
|
||||||
and show the trees and their linearizations in all languages
|
|
||||||
- ``gt <Cat> <Int>``: generate a number of trees in category from smallest,
|
|
||||||
and show their linearizations in all languages
|
|
||||||
- ``gtt <Cat> <Int>``: generate a number of trees in category from smallest,
|
|
||||||
and show the trees and their linearizations in all languages
|
|
||||||
- ``p <Int> <Cat> <String>``: "parse", i.e. generate trees until match or
|
|
||||||
until the given number have been generated
|
|
||||||
- ``<Tree>``: linearize tree in all languages, also showing full records
|
|
||||||
- ``quit``: terminate the system cleanly
|
|
||||||
|
|
||||||
|
|
||||||
==Interpreter in C++==
|
|
||||||
|
|
||||||
A base-line interpreter in C++ has been started.
|
|
||||||
Its main functionality is random generation of trees and linearization of them.
|
|
||||||
|
|
||||||
Here are some results from running the different interpreters, compared
|
|
||||||
to running the same grammar in GF, saved in ``.gfcm`` format.
|
|
||||||
The grammar contains the English, German, and Norwegian
|
|
||||||
versions of Bronzeage. The experiment was carried out on
|
|
||||||
Ubuntu Linux laptop with 1.5 GHz Intel centrino processor.
|
|
||||||
|
|
||||||
|| | GF | gfcc(hs) | gfcc++ |
|
|
||||||
| program size | 7249k | 803k | 113k
|
|
||||||
| grammar size | 336k | 119k | 119k
|
|
||||||
| read grammar | 1150ms | 510ms | 100ms
|
|
||||||
| generate 222 | 9500ms | 450ms | 800ms
|
|
||||||
| memory | 21M | 10M | 20M
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
To summarize:
|
|
||||||
- going from GF to gfcc is a major win in both code size and efficiency
|
|
||||||
- going from Haskell to C++ interpreter is not a win yet, because of a space
|
|
||||||
leak in the C++ version
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Some things to do==
|
|
||||||
|
|
||||||
Interpreter in Java.
|
|
||||||
|
|
||||||
Parsing via MCFG
|
|
||||||
- the FCFG format can possibly be simplified
|
|
||||||
- parser grammars should be saved in files to make interpreters easier
|
|
||||||
|
|
||||||
|
|
||||||
Hand-written parsers for GFCC grammars to reduce code size
|
|
||||||
(and efficiency?) of interpreters.
|
|
||||||
|
|
||||||
Binary format and/or file compression of GFCC output.
|
|
||||||
|
|
||||||
Syntax editor based on GFCC.
|
|
||||||
|
|
||||||
Rewriting of resource libraries in order to exploit the
|
|
||||||
word-suffix sharing better (depth-one tables, as in FM).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,180 +0,0 @@
|
|||||||
GFCC Syntax
|
|
||||||
|
|
||||||
|
|
||||||
==Syntax of GFCC files==
|
|
||||||
|
|
||||||
The parser syntax is very simple, as defined in BNF:
|
|
||||||
```
|
|
||||||
Grm. Grammar ::= [RExp] ;
|
|
||||||
|
|
||||||
App. RExp ::= "(" CId [RExp] ")" ;
|
|
||||||
AId. RExp ::= CId ;
|
|
||||||
AInt. RExp ::= Integer ;
|
|
||||||
AStr. RExp ::= String ;
|
|
||||||
AFlt. RExp ::= Double ;
|
|
||||||
AMet. RExp ::= "?" ;
|
|
||||||
|
|
||||||
terminator RExp "" ;
|
|
||||||
|
|
||||||
token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
|
|
||||||
```
|
|
||||||
While a parser and a printer can be generated for many languages
|
|
||||||
from this grammar by using the BNF Converter, a parser is also
|
|
||||||
easy to write by hand using recursive descent.
|
|
||||||
|
|
||||||
|
|
||||||
==Syntax of well-formed GFCC code==
|
|
||||||
|
|
||||||
Here is a summary of well-formed syntax,
|
|
||||||
with a comment on the semantics of each construction.
|
|
||||||
```
|
|
||||||
Grammar ::=
|
|
||||||
("grammar" CId CId*) -- abstract syntax name and concrete syntax names
|
|
||||||
"(" "flags" Flag* ")" -- global and abstract flags
|
|
||||||
"(" "abstract" Abstract ")" -- abstract syntax
|
|
||||||
"(" "concrete" Concrete* ")" -- concrete syntaxes
|
|
||||||
|
|
||||||
Abstract ::=
|
|
||||||
"(" "fun" FunDef* ")" -- function definitions
|
|
||||||
"(" "cat" CatDef* ")" -- category definitions
|
|
||||||
|
|
||||||
Concrete ::=
|
|
||||||
"(" CId -- language name
|
|
||||||
"flags" Flag* -- concrete flags
|
|
||||||
"lin" LinDef* -- linearization rules
|
|
||||||
"oper" LinDef* -- operations (macros)
|
|
||||||
"lincat" LinDef* -- linearization type definitions
|
|
||||||
"lindef" LinDef* -- linearization default definitions
|
|
||||||
"printname" LinDef* -- printname definitions
|
|
||||||
"param" LinDef* -- lincats with labels and parameter value names
|
|
||||||
")"
|
|
||||||
|
|
||||||
Flag ::= "(" CId String ")" -- flag and value
|
|
||||||
FunDef ::= "(" CId Type Exp ")" -- function, type, and definition
|
|
||||||
CatDef ::= "(" CId Hypo* ")" -- category and context
|
|
||||||
LinDef ::= "(" CId Term ")" -- function and definition
|
|
||||||
|
|
||||||
Type ::=
|
|
||||||
"(" CId -- value category
|
|
||||||
"(" "H" Hypo* ")" -- argument context
|
|
||||||
"(" "X" Exp* ")" ")" -- arguments (of dependent value type)
|
|
||||||
|
|
||||||
Exp ::=
|
|
||||||
"(" CId -- function
|
|
||||||
"(" "B" CId* ")" -- bindings
|
|
||||||
"(" "X" Exp* ")" ")" -- arguments
|
|
||||||
| CId -- variable
|
|
||||||
| "?" -- metavariable
|
|
||||||
| "(" "Eq" Equation* ")" -- group of pattern equations
|
|
||||||
| Integer -- integer literal (non-negative)
|
|
||||||
| Float -- floating-point literal (non-negative)
|
|
||||||
| String -- string literal (in double quotes)
|
|
||||||
|
|
||||||
Hypo ::= "(" CId Type ")" -- variable and type
|
|
||||||
|
|
||||||
Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list
|
|
||||||
|
|
||||||
Term ::=
|
|
||||||
"(" "R" Term* ")" -- array (record or table)
|
|
||||||
| "(" "S" Term* ")" -- concatenated sequence
|
|
||||||
| "(" "FV" Term* ")" -- free variant list
|
|
||||||
| "(" "P" Term Term ")" -- access to index (projection or selection)
|
|
||||||
| "(" "W" String Term ")" -- token prefix with suffix list
|
|
||||||
| "(" "A" Integer ")" -- pointer to subtree
|
|
||||||
| String -- token (in double quotes)
|
|
||||||
| Integer -- index in array
|
|
||||||
| CId -- macro constant
|
|
||||||
| "?" -- metavariable
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
==GFCC interpreter==
|
|
||||||
|
|
||||||
The first phase in interpreting GFCC is to parse a GFCC file and
|
|
||||||
build an internal abstract syntax representation, as specified
|
|
||||||
in the previous section.
|
|
||||||
|
|
||||||
With this representation, linearization can be performed by
|
|
||||||
a straightforward function from expressions (``Exp``) to terms
|
|
||||||
(``Term``). All expressions except groups of pattern equations
|
|
||||||
can be linearized.
|
|
||||||
|
|
||||||
Here is a reference Haskell implementation of linearization:
|
|
||||||
```
|
|
||||||
linExp :: GFCC -> CId -> Exp -> Term
|
|
||||||
linExp gfcc lang tree@(DTr _ at trees) = case at of
|
|
||||||
AC fun -> comp (map lin trees) $ look fun
|
|
||||||
AS s -> R [K (show s)] -- quoted
|
|
||||||
AI i -> R [K (show i)]
|
|
||||||
AF d -> R [K (show d)]
|
|
||||||
AM -> TM
|
|
||||||
where
|
|
||||||
lin = linExp gfcc lang
|
|
||||||
comp = compute gfcc lang
|
|
||||||
look = lookLin gfcc lang
|
|
||||||
```
|
|
||||||
TODO: bindings must be supported.
|
|
||||||
|
|
||||||
Terms resulting from linearization are evaluated in
|
|
||||||
call-by-value order, with two environments needed:
|
|
||||||
- the grammar (a concrete syntax) to give the global constants
|
|
||||||
- an array of terms to give the subtree linearizations
|
|
||||||
|
|
||||||
|
|
||||||
The Haskell implementation works as follows:
|
|
||||||
```
|
|
||||||
compute :: GFCC -> CId -> [Term] -> Term -> Term
|
|
||||||
compute gfcc lang args = comp where
|
|
||||||
comp trm = case trm of
|
|
||||||
P r p -> proj (comp r) (comp p)
|
|
||||||
W s t -> W s (comp t)
|
|
||||||
R ts -> R $ map comp ts
|
|
||||||
V i -> idx args (fromInteger i) -- already computed
|
|
||||||
F c -> comp $ look c -- not computed (if contains V)
|
|
||||||
FV ts -> FV $ Prelude.map comp ts
|
|
||||||
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
|
|
||||||
_ -> trm
|
|
||||||
|
|
||||||
look = lookOper gfcc lang
|
|
||||||
|
|
||||||
idx xs i = xs !! i
|
|
||||||
|
|
||||||
proj r p = case (r,p) of
|
|
||||||
(_, FV ts) -> FV $ Prelude.map (proj r) ts
|
|
||||||
(FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts
|
|
||||||
(W s t, _) -> kks (s ++ getString (proj t p))
|
|
||||||
_ -> comp $ getField r (getIndex p)
|
|
||||||
|
|
||||||
getString t = case t of
|
|
||||||
K (KS s) -> s
|
|
||||||
_ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
|
|
||||||
|
|
||||||
getIndex t = case t of
|
|
||||||
C i -> fromInteger i
|
|
||||||
RP p _ -> getIndex p
|
|
||||||
TM -> 0 -- default value for parameter
|
|
||||||
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
|
|
||||||
|
|
||||||
getField t i = case t of
|
|
||||||
R rs -> idx rs i
|
|
||||||
RP _ r -> getField r i
|
|
||||||
TM -> TM
|
|
||||||
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
|
|
||||||
```
|
|
||||||
The result of linearization is usually a record, which is realized as
|
|
||||||
a string using the following algorithm.
|
|
||||||
```
|
|
||||||
realize :: Term -> String
|
|
||||||
realize trm = case trm of
|
|
||||||
R (t:_) -> realize t
|
|
||||||
S ss -> unwords $ map realize ss
|
|
||||||
K s -> s
|
|
||||||
W s t -> s ++ realize t
|
|
||||||
FV (t:_) -> realize t -- TODO: all variants
|
|
||||||
TM -> "?"
|
|
||||||
```
|
|
||||||
Notice that realization always picks the first field of a record.
|
|
||||||
If a linearization type has more than one field, the first field
|
|
||||||
does not necessarily contain the desired string.
|
|
||||||
Also notice that the order of record fields in GFCC is not necessarily
|
|
||||||
the same as in GF source.
|
|
||||||
@@ -1,153 +0,0 @@
|
|||||||
Procedure for making a GF release:
|
|
||||||
|
|
||||||
1. Make sure everything that should be in the release has been
|
|
||||||
checked in.
|
|
||||||
|
|
||||||
2. Go to the src/ dir.
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
|
|
||||||
3. Edit configure.ac to set the right version number
|
|
||||||
(the second argument to the AC_INIT macro).
|
|
||||||
|
|
||||||
4. Edit gf.spec to set the version and release numbers
|
|
||||||
(change %define version and %define release).
|
|
||||||
|
|
||||||
5. Commit configure.ac and gf.spec:
|
|
||||||
|
|
||||||
$ darcs record -m 'Updated version numbers.' configure.ac gf.spec
|
|
||||||
|
|
||||||
6. Run autoconf to generate configure with the right version number:
|
|
||||||
|
|
||||||
$ autoconf
|
|
||||||
|
|
||||||
7. Go back to the root of the tree.
|
|
||||||
|
|
||||||
$ cd ..
|
|
||||||
|
|
||||||
8. Tag the release. (X_X should be replaced by the version number, with
|
|
||||||
_ instead of ., e.g. 2_0)
|
|
||||||
|
|
||||||
$ darcs tag -m RELEASE-X_X
|
|
||||||
|
|
||||||
9. Push the changes that you made for the release to the main repo:
|
|
||||||
|
|
||||||
$ darcs push
|
|
||||||
|
|
||||||
10. Build a source package:
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
$ ./configure
|
|
||||||
$ make dist
|
|
||||||
|
|
||||||
11. (Only if releasing a new grammars distribution)
|
|
||||||
Build a grammar tarball:
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
$ ./configure && make grammar-dist
|
|
||||||
|
|
||||||
12. Build an x86/linux RPM (should be done on a Mandrake Linux box):
|
|
||||||
|
|
||||||
Setup for building RPMs (first time only):
|
|
||||||
|
|
||||||
- Make sure that you have the directories neccessary to build
|
|
||||||
RPMs:
|
|
||||||
|
|
||||||
$ mkdir -p ~/rpm/{BUILD,RPMS/i586,RPMS/noarch,SOURCES,SRPMS,SPECS,tmp}
|
|
||||||
|
|
||||||
- Create ~/.rpmrc with the following contents:
|
|
||||||
|
|
||||||
buildarchtranslate: i386: i586
|
|
||||||
buildarchtranslate: i486: i586
|
|
||||||
buildarchtranslate: i586: i586
|
|
||||||
buildarchtranslate: i686: i586
|
|
||||||
|
|
||||||
- Create ~/.rpmmacros with the following contents:
|
|
||||||
|
|
||||||
%_topdir %(echo ${HOME}/rpm)
|
|
||||||
%_tmppath %{_topdir}/tmp
|
|
||||||
|
|
||||||
%packager Your Name <yourusername@cs.chalmers.se>
|
|
||||||
|
|
||||||
Build the RPM:
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
$ ./configure && make rpm
|
|
||||||
|
|
||||||
13. Build a generic binary x86/linux package (should be done on a Linux box,
|
|
||||||
e.g. banded.medic.chalmers.se):
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
$ ./configure --host=i386-pc-linux-gnu && make binary-dist
|
|
||||||
|
|
||||||
14. Build a generic binary sparc/solaris package (should be done
|
|
||||||
on a Solaris box, e.g. remote1.cs.chalmers.se):
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
$ ./configure --host=sparc-sun-solaris2 && gmake binary-dist
|
|
||||||
|
|
||||||
15. Build a Mac OS X package (should be done on a Mac OS X box,
|
|
||||||
e.g. csmisc99.cs.chalmers.se):
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
$ ./configure && make binary-dist
|
|
||||||
|
|
||||||
Note that to run GHC-compiled binaries on OS X, you need
|
|
||||||
a "Haskell Support Framework". This should be available
|
|
||||||
separately from the GF download page.
|
|
||||||
|
|
||||||
TODO: Use OS X PackageMaker to build a .pkg-file which can
|
|
||||||
be installed using the standard OS X Installer program.
|
|
||||||
|
|
||||||
16. Build a binary Cygwin package (should be done on a Windows
|
|
||||||
machine with Cygwin):
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
$ ./configure && make binary-dist
|
|
||||||
|
|
||||||
17. Build a Windows MSI package (FIXME: This doesn't work right,
|
|
||||||
pathnames with backslashes and spaces are not handled
|
|
||||||
correctly in Windows. We only release a binary tarball
|
|
||||||
for Cygwin right now.):
|
|
||||||
|
|
||||||
$ cd src
|
|
||||||
$ ./configure && make all windows-msi
|
|
||||||
|
|
||||||
18. Add new GF package release to SourceForge:
|
|
||||||
|
|
||||||
- https://sourceforge.net/projects/gf-tools
|
|
||||||
|
|
||||||
- Project page -> Admin -> File releases -> Add release (for the
|
|
||||||
GF package)
|
|
||||||
|
|
||||||
- New release name: X.X (just the version number, e.g. 2.2)
|
|
||||||
|
|
||||||
- Paste in release notes
|
|
||||||
|
|
||||||
- Upload files using anonymous FTP to upload.sourceforge.net
|
|
||||||
in the incoming directory.
|
|
||||||
|
|
||||||
- Add the files to the release and set the processor
|
|
||||||
and file type for each file (remember to press
|
|
||||||
Update/Refresh for each file):
|
|
||||||
* x86 rpm -> i386/.rpm
|
|
||||||
* source rpm -> Any/Source .rpm
|
|
||||||
* x86 binary tarball -> i386/.gz
|
|
||||||
* sparc binary tarball -> Sparc/.gz
|
|
||||||
* source package -> Any/Source .gz
|
|
||||||
|
|
||||||
19. Add new GF-editor release. Repeat the steps above, but
|
|
||||||
with GF-editor:
|
|
||||||
|
|
||||||
- Add files and set properties:
|
|
||||||
|
|
||||||
* editor rpm -> i386/.rpm (not really true, but I haven't
|
|
||||||
figured out how to make noarch rpms from the same spec as
|
|
||||||
arch-specific ones)
|
|
||||||
|
|
||||||
20. Mail to gf-tools-users@lists.sourceforge.net
|
|
||||||
|
|
||||||
21. Update website.
|
|
||||||
|
|
||||||
22. Party!
|
|
||||||
|
|
||||||
@@ -1,967 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
||||||
<HTML>
|
|
||||||
<HEAD>
|
|
||||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
|
||||||
<TITLE>Resource grammar writing HOWTO</TITLE>
|
|
||||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
|
||||||
<P ALIGN="center"><CENTER><H1>Resource grammar writing HOWTO</H1>
|
|
||||||
<FONT SIZE="4">
|
|
||||||
<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR>
|
|
||||||
Last update: Mon Sep 22 14:28:01 2008
|
|
||||||
</FONT></CENTER>
|
|
||||||
|
|
||||||
<P></P>
|
|
||||||
<HR NOSHADE SIZE=1>
|
|
||||||
<P></P>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc1">The resource grammar structure</A>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc2">Library API modules</A>
|
|
||||||
<LI><A HREF="#toc3">Phrase category modules</A>
|
|
||||||
<LI><A HREF="#toc4">Infrastructure modules</A>
|
|
||||||
<LI><A HREF="#toc5">Lexical modules</A>
|
|
||||||
</UL>
|
|
||||||
<LI><A HREF="#toc6">Language-dependent syntax modules</A>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc7">The present-tense fragment</A>
|
|
||||||
</UL>
|
|
||||||
<LI><A HREF="#toc8">Phases of the work</A>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc9">Putting up a directory</A>
|
|
||||||
<LI><A HREF="#toc10">Direction of work</A>
|
|
||||||
<LI><A HREF="#toc11">The develop-test cycle</A>
|
|
||||||
<LI><A HREF="#toc12">Auxiliary modules</A>
|
|
||||||
<LI><A HREF="#toc13">Morphology and lexicon</A>
|
|
||||||
<LI><A HREF="#toc14">Lock fields</A>
|
|
||||||
<LI><A HREF="#toc15">Lexicon construction</A>
|
|
||||||
</UL>
|
|
||||||
<LI><A HREF="#toc16">Lexicon extension</A>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc17">The irregularity lexicon</A>
|
|
||||||
<LI><A HREF="#toc18">Lexicon extraction from a word list</A>
|
|
||||||
<LI><A HREF="#toc19">Lexicon extraction from raw text data</A>
|
|
||||||
<LI><A HREF="#toc20">Bootstrapping with smart paradigms</A>
|
|
||||||
</UL>
|
|
||||||
<LI><A HREF="#toc21">Extending the resource grammar API</A>
|
|
||||||
<LI><A HREF="#toc22">Using parametrized modules</A>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc23">Writing an instance of parametrized resource grammar implementation</A>
|
|
||||||
<LI><A HREF="#toc24">Parametrizing a resource grammar implementation</A>
|
|
||||||
</UL>
|
|
||||||
<LI><A HREF="#toc25">Character encoding and transliterations</A>
|
|
||||||
<LI><A HREF="#toc26">Coding conventions in GF</A>
|
|
||||||
<LI><A HREF="#toc27">Transliterations</A>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P></P>
|
|
||||||
<HR NOSHADE SIZE=1>
|
|
||||||
<P></P>
|
|
||||||
<P>
|
|
||||||
<B>History</B>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
September 2008: updated for Version 1.5.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
October 2007: updated for Version 1.2.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
January 2006: first version.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The purpose of this document is to tell how to implement the GF
|
|
||||||
resource grammar API for a new language. We will <I>not</I> cover how
|
|
||||||
to use the resource grammar, nor how to change the API. But we
|
|
||||||
will give some hints how to extend the API.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
A manual for using the resource grammar is found in
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="../lib/resource/doc/synopsis.html"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html</CODE></A>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
A tutorial on GF, also introducing the idea of resource grammars, is found in
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="./gf-tutorial.html"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html</CODE></A>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
This document concerns the API v. 1.5, while the current stable release is 1.4.
|
|
||||||
You can find the code for the stable release in
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="../lib/resource"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/</CODE></A>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
and the next release in
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="../next-lib/src"><CODE>www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/</CODE></A>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
It is recommended to build new grammars to match the next release.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc1"></A>
|
|
||||||
<H2>The resource grammar structure</H2>
|
|
||||||
<P>
|
|
||||||
The library is divided into a bunch of modules, whose dependencies
|
|
||||||
are given in the following figure.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<IMG ALIGN="left" SRC="Syntax.png" BORDER="0" ALT="">
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Modules of different kinds are distinguished as follows:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>solid contours: module seen by end users
|
|
||||||
<LI>dashed contours: internal module
|
|
||||||
<LI>ellipse: abstract/concrete pair of modules
|
|
||||||
<LI>rectangle: resource or instance
|
|
||||||
<LI>diamond: interface
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Put in another way:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>solid rectangles and diamonds: user-accessible library API
|
|
||||||
<LI>solid ellipses: user-accessible top-level grammar for parsing and linearization
|
|
||||||
<LI>dashed contours: not visible to users
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
The dashed ellipses form the main parts of the implementation, on which the resource
|
|
||||||
grammar programmer has to work with. She also has to work on the <CODE>Paradigms</CODE>
|
|
||||||
module. The rest of the modules can be produced mechanically from corresponding
|
|
||||||
modules for other languages, by just changing the language codes appearing in
|
|
||||||
their module headers.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The module structure is rather flat: most modules are direct
|
|
||||||
parents of <CODE>Grammar</CODE>. The idea
|
|
||||||
is that the implementors can concentrate on one linguistic aspect at a time, or
|
|
||||||
also distribute the work among several authors. The module <CODE>Cat</CODE>
|
|
||||||
defines the "glue" that ties the aspects together - a type system
|
|
||||||
to which all the other modules conform, so that e.g. <CODE>NP</CODE> means
|
|
||||||
the same thing in those modules that use <CODE>NP</CODE>s and those that
|
|
||||||
constructs them.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc2"></A>
|
|
||||||
<H3>Library API modules</H3>
|
|
||||||
<P>
|
|
||||||
For the user of the library, these modules are the most important ones.
|
|
||||||
In a typical application, it is enough to open <CODE>Paradigms</CODE> and <CODE>Syntax</CODE>.
|
|
||||||
The module <CODE>Try</CODE> combines these two, making it possible to experiment
|
|
||||||
with combinations of syntactic and lexical constructors by using the
|
|
||||||
<CODE>cc</CODE> command in the GF shell. Here are short explanations of each API module:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><CODE>Try</CODE>: the whole resource library for a language (<CODE>Paradigms</CODE>, <CODE>Syntax</CODE>,
|
|
||||||
<CODE>Irreg</CODE>, and <CODE>Extra</CODE>);
|
|
||||||
produced mechanically as a collection of modules
|
|
||||||
<LI><CODE>Syntax</CODE>: language-independent categories, syntax functions, and structural words;
|
|
||||||
produced mechanically as a collection of modules
|
|
||||||
<LI><CODE>Constructors</CODE>: language-independent syntax functions and structural words;
|
|
||||||
produced mechanically via functor instantiation
|
|
||||||
<LI><CODE>Paradigms</CODE>: language-dependent morphological paradigms
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<A NAME="toc3"></A>
|
|
||||||
<H3>Phrase category modules</H3>
|
|
||||||
<P>
|
|
||||||
The immediate parents of <CODE>Grammar</CODE> will be called <B>phrase category modules</B>,
|
|
||||||
since each of them concentrates on a particular phrase category (nouns, verbs,
|
|
||||||
adjectives, sentences,...). A phrase category module tells
|
|
||||||
<I>how to construct phrases in that category</I>. You will find out that
|
|
||||||
all functions in any of these modules have the same value type (or maybe
|
|
||||||
one of a small number of different types). Thus we have
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><CODE>Noun</CODE>: construction of nouns and noun phrases
|
|
||||||
<LI><CODE>Adjective</CODE>: construction of adjectival phrases
|
|
||||||
<LI><CODE>Verb</CODE>: construction of verb phrases
|
|
||||||
<LI><CODE>Adverb</CODE>: construction of adverbial phrases
|
|
||||||
<LI><CODE>Numeral</CODE>: construction of cardinal and ordinal numerals
|
|
||||||
<LI><CODE>Sentence</CODE>: construction of sentences and imperatives
|
|
||||||
<LI><CODE>Question</CODE>: construction of questions
|
|
||||||
<LI><CODE>Relative</CODE>: construction of relative clauses
|
|
||||||
<LI><CODE>Conjunction</CODE>: coordination of phrases
|
|
||||||
<LI><CODE>Phrase</CODE>: construction of the major units of text and speech
|
|
||||||
<LI><CODE>Text</CODE>: construction of texts as sequences of phrases
|
|
||||||
<LI><CODE>Idiom</CODE>: idiomatic expressions such as existentials
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<A NAME="toc4"></A>
|
|
||||||
<H3>Infrastructure modules</H3>
|
|
||||||
<P>
|
|
||||||
Expressions of each phrase category are constructed in the corresponding
|
|
||||||
phrase category module. But their <I>use</I> takes mostly place in other modules.
|
|
||||||
For instance, noun phrases, which are constructed in <CODE>Noun</CODE>, are
|
|
||||||
used as arguments of functions of almost all other phrase category modules.
|
|
||||||
How can we build all these modules independently of each other?
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
As usual in typeful programming, the <I>only</I> thing you need to know
|
|
||||||
about an object you use is its type. When writing a linearization rule
|
|
||||||
for a GF abstract syntax function, the only thing you need to know is
|
|
||||||
the linearization types of its value and argument categories. To achieve
|
|
||||||
the division of the resource grammar to several parallel phrase category modules,
|
|
||||||
what we need is an underlying definition of the linearization types. This
|
|
||||||
definition is given as the implementation of
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><CODE>Cat</CODE>: syntactic categories of the resource grammar
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Any resource grammar implementation has first to agree on how to implement
|
|
||||||
<CODE>Cat</CODE>. Luckily enough, even this can be done incrementally: you
|
|
||||||
can skip the <CODE>lincat</CODE> definition of a category and use the default
|
|
||||||
<CODE>{s : Str}</CODE> until you need to change it to something else. In
|
|
||||||
English, for instance, many categories do have this linearization type.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc5"></A>
|
|
||||||
<H3>Lexical modules</H3>
|
|
||||||
<P>
|
|
||||||
What is lexical and what is syntactic is not as clearcut in GF as in
|
|
||||||
some other grammar formalisms. Logically, lexical means atom, i.e. a
|
|
||||||
<CODE>fun</CODE> with no arguments. Linguistically, one may add to this
|
|
||||||
that the <CODE>lin</CODE> consists of only one token (or of a table whose values
|
|
||||||
are single tokens). Even in the restricted lexicon included in the resource
|
|
||||||
API, the latter rule is sometimes violated in some languages. For instance,
|
|
||||||
<CODE>Structural.both7and_DConj</CODE> is an atom, but its linearization is
|
|
||||||
two words e.g. <I>both - and</I>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Another characterization of lexical is that lexical units can be added
|
|
||||||
almost <I>ad libitum</I>, and they cannot be defined in terms of already
|
|
||||||
given rules. The lexical modules of the resource API are thus more like
|
|
||||||
samples than complete lists. There are two such modules:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><CODE>Structural</CODE>: structural words (determiners, conjunctions,...)
|
|
||||||
<LI><CODE>Lexicon</CODE>: basic everyday content words (nouns, verbs,...)
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
The module <CODE>Structural</CODE> aims for completeness, and is likely to
|
|
||||||
be extended in future releases of the resource. The module <CODE>Lexicon</CODE>
|
|
||||||
gives a "random" list of words, which enables testing the syntax.
|
|
||||||
It also provides a check list for morphology, since those words are likely to include
|
|
||||||
most morphological patterns of the language.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In the case of <CODE>Lexicon</CODE> it may come out clearer than anywhere else
|
|
||||||
in the API that it is impossible to give exact translation equivalents in
|
|
||||||
different languages on the level of a resource grammar. This is no problem,
|
|
||||||
since application grammars can use the resource in different ways for
|
|
||||||
different languages.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc6"></A>
|
|
||||||
<H2>Language-dependent syntax modules</H2>
|
|
||||||
<P>
|
|
||||||
In addition to the common API, there is room for language-dependent extensions
|
|
||||||
of the resource. The top level of each languages looks as follows (with German
|
|
||||||
as example):
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
where <CODE>ExtraGerAbs</CODE> is a collection of syntactic structures specific to German,
|
|
||||||
and <CODE>IrregGerAbs</CODE> is a dictionary of irregular words of German
|
|
||||||
(at the moment, just verbs). Each of these language-specific grammars has
|
|
||||||
the potential to grow into a full-scale grammar of the language. These grammar
|
|
||||||
can also be used as libraries, but the possibility of using functors is lost.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
To give a better overview of language-specific structures,
|
|
||||||
modules like <CODE>ExtraGerAbs</CODE>
|
|
||||||
are built from a language-independent module <CODE>ExtraAbs</CODE>
|
|
||||||
by restricted inheritance:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
abstract ExtraGerAbs = Extra [f,g,...]
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
Thus any category and function in <CODE>Extra</CODE> may be shared by a subset of all
|
|
||||||
languages. One can see this set-up as a matrix, which tells
|
|
||||||
what <CODE>Extra</CODE> structures
|
|
||||||
are implemented in what languages. For the common API in <CODE>Grammar</CODE>, the matrix
|
|
||||||
is filled with 1's (everything is implemented in every language).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In a minimal resource grammar implementation, the language-dependent
|
|
||||||
extensions are just empty modules, but it is good to provide them for
|
|
||||||
the sake of uniformity.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc7"></A>
|
|
||||||
<H3>The present-tense fragment</H3>
|
|
||||||
<P>
|
|
||||||
Some lines in the resource library are suffixed with the comment
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
--# notpresent
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
which is used by a preprocessor to exclude those lines from
|
|
||||||
a reduced version of the full resource. This present-tense-only
|
|
||||||
version is useful for applications in most technical text, since
|
|
||||||
they reduce the grammar size and compilation time. It can also
|
|
||||||
be useful to exclude those lines in a first version of resource
|
|
||||||
implementation. To compile a grammar with present-tense-only, use
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
make Present
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
with <CODE>resource/Makefile</CODE>.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc8"></A>
|
|
||||||
<H2>Phases of the work</H2>
|
|
||||||
<A NAME="toc9"></A>
|
|
||||||
<H3>Putting up a directory</H3>
|
|
||||||
<P>
|
|
||||||
Unless you are writing an instance of a parametrized implementation
|
|
||||||
(Romance or Scandinavian), which will be covered later, the
|
|
||||||
simplest way is to follow roughly the following procedure. Assume you
|
|
||||||
are building a grammar for the German language. Here are the first steps,
|
|
||||||
which we actually followed ourselves when building the German implementation
|
|
||||||
of resource v. 1.0 at Ubuntu linux. We have slightly modified them to
|
|
||||||
match resource v. 1.5 and GF v. 3.0.
|
|
||||||
</P>
|
|
||||||
<OL>
|
|
||||||
<LI>Create a sister directory for <CODE>GF/lib/resource/english</CODE>, named
|
|
||||||
<CODE>german</CODE>.
|
|
||||||
<PRE>
|
|
||||||
cd GF/lib/resource/
|
|
||||||
mkdir german
|
|
||||||
cd german
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<LI>Check out the [ISO 639 3-letter language code
|
|
||||||
<A HREF="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">http://www.w3.org/WAI/ER/IG/ert/iso639.htm</A>]
|
|
||||||
for German: both <CODE>Ger</CODE> and <CODE>Deu</CODE> are given, and we pick <CODE>Ger</CODE>.
|
|
||||||
(We use the 3-letter codes rather than the more common 2-letter codes,
|
|
||||||
since they will suffice for many more languages!)
|
|
||||||
<P></P>
|
|
||||||
<LI>Copy the <CODE>*Eng.gf</CODE> files from <CODE>english</CODE> <CODE>german</CODE>,
|
|
||||||
and rename them:
|
|
||||||
<PRE>
|
|
||||||
cp ../english/*Eng.gf .
|
|
||||||
rename 's/Eng/Ger/' *Eng.gf
|
|
||||||
</PRE>
|
|
||||||
If you don't have the <CODE>rename</CODE> command, you can use a bash script with <CODE>mv</CODE>.
|
|
||||||
</OL>
|
|
||||||
|
|
||||||
<OL>
|
|
||||||
<LI>Change the <CODE>Eng</CODE> module references to <CODE>Ger</CODE> references
|
|
||||||
in all files:
|
|
||||||
<PRE>
|
|
||||||
sed -i 's/English/German/g' *Ger.gf
|
|
||||||
sed -i 's/Eng/Ger/g' *Ger.gf
|
|
||||||
</PRE>
|
|
||||||
The first line prevents changing the word <CODE>English</CODE>, which appears
|
|
||||||
here and there in comments, to <CODE>Gerlish</CODE>. The <CODE>sed</CODE> command syntax
|
|
||||||
may vary depending on your operating system.
|
|
||||||
<P></P>
|
|
||||||
<LI>This may of course change unwanted occurrences of the
|
|
||||||
string <CODE>Eng</CODE> - verify this by
|
|
||||||
<PRE>
|
|
||||||
grep Ger *.gf
|
|
||||||
</PRE>
|
|
||||||
But you will have to make lots of manual changes in all files anyway!
|
|
||||||
<P></P>
|
|
||||||
<LI>Comment out the contents of these files:
|
|
||||||
<PRE>
|
|
||||||
sed -i 's/^/--/' *Ger.gf
|
|
||||||
</PRE>
|
|
||||||
This will give you a set of templates out of which the grammar
|
|
||||||
will grow as you uncomment and modify the files rule by rule.
|
|
||||||
<P></P>
|
|
||||||
<LI>In all <CODE>.gf</CODE> files, uncomment the module headers and brackets,
|
|
||||||
leaving the module bodies commented. Unfortunately, there is no
|
|
||||||
simple way to do this automatically (or to avoid commenting these
|
|
||||||
lines in the previous step) - but uncommenting the first
|
|
||||||
and the last lines will actually do the job for many of the files.
|
|
||||||
<P></P>
|
|
||||||
<LI>Uncomment the contents of the main grammar file:
|
|
||||||
<PRE>
|
|
||||||
sed -i 's/^--//' LangGer.gf
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<LI>Now you can open the grammar <CODE>LangGer</CODE> in GF:
|
|
||||||
<PRE>
|
|
||||||
gf LangGer.gf
|
|
||||||
</PRE>
|
|
||||||
You will get lots of warnings on missing rules, but the grammar will compile.
|
|
||||||
<P></P>
|
|
||||||
<LI>At all the following steps you will now have a valid, but incomplete
|
|
||||||
GF grammar. The GF command
|
|
||||||
<PRE>
|
|
||||||
pg -missing
|
|
||||||
</PRE>
|
|
||||||
tells you what exactly is missing.
|
|
||||||
</OL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Here is the module structure of <CODE>LangGer</CODE>. It has been simplified by leaving out
|
|
||||||
the majority of the phrase category modules. Each of them has the same dependencies
|
|
||||||
as <CODE>VerbGer</CODE>, whose complete dependencies are shown as an example.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<IMG ALIGN="middle" SRC="German.png" BORDER="0" ALT="">
|
|
||||||
</P>
|
|
||||||
<A NAME="toc10"></A>
|
|
||||||
<H3>Direction of work</H3>
|
|
||||||
<P>
|
|
||||||
The real work starts now. There are many ways to proceed, the most obvious ones being
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>Top-down: start from the module <CODE>Phrase</CODE> and go down to <CODE>Sentence</CODE>, then
|
|
||||||
<CODE>Verb</CODE>, <CODE>Noun</CODE>, and in the end <CODE>Lexicon</CODE>. In this way, you are all the time
|
|
||||||
building complete phrases, and add them with more content as you proceed.
|
|
||||||
<B>This approach is not recommended</B>. It is impossible to test the rules if
|
|
||||||
you have no words to apply the constructions to.
|
|
||||||
<P></P>
|
|
||||||
<LI>Bottom-up: set as your first goal to implement <CODE>Lexicon</CODE>. To this end, you
|
|
||||||
need to write <CODE>ParadigmsGer</CODE>, which in turn needs parts of
|
|
||||||
<CODE>MorphoGer</CODE> and <CODE>ResGer</CODE>.
|
|
||||||
<B>This approach is not recommended</B>. You can get stuck to details of
|
|
||||||
morphology such as irregular words, and you don't have enough grasp about
|
|
||||||
the type system to decide what forms to cover in morphology.
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
The practical working direction is thus a saw-like motion between the morphological
|
|
||||||
and top-level modules. Here is a possible course of the work that gives enough
|
|
||||||
test data and enough general view at any point:
|
|
||||||
</P>
|
|
||||||
<OL>
|
|
||||||
<LI>Define <CODE>Cat.N</CODE> and the required parameter types in <CODE>ResGer</CODE>. As we define
|
|
||||||
<PRE>
|
|
||||||
lincat N = {s : Number => Case => Str ; g : Gender} ;
|
|
||||||
</PRE>
|
|
||||||
we need the parameter types <CODE>Number</CODE>, <CODE>Case</CODE>, and <CODE>Gender</CODE>. The definition
|
|
||||||
of <CODE>Number</CODE> in <A HREF="../lib/resource/common/ParamX.gf"><CODE>common/ParamX</CODE></A>
|
|
||||||
works for German, so we
|
|
||||||
use it and just define <CODE>Case</CODE> and <CODE>Gender</CODE> in <CODE>ResGer</CODE>.
|
|
||||||
<P></P>
|
|
||||||
<LI>Define some cases of <CODE>mkN</CODE> in <CODE>ParadigmsGer</CODE>. In this way you can
|
|
||||||
already implement a huge amount of nouns correctly in <CODE>LexiconGer</CODE>. Actually
|
|
||||||
just adding the worst-case instance of <CODE>mkN</CODE> (the one taking the most
|
|
||||||
arguments) should suffice for every noun - but,
|
|
||||||
since it is tedious to use, you
|
|
||||||
might proceed to the next step before returning to morphology and defining the
|
|
||||||
real work horse, <CODE>mkN</CODE> taking two forms and a gender.
|
|
||||||
<P></P>
|
|
||||||
<LI>While doing this, you may want to test the resource independently. Do this by
|
|
||||||
starting the GF shell in the <CODE>resource</CODE> directory, by the commands
|
|
||||||
<PRE>
|
|
||||||
> i -retain german/ParadigmsGer
|
|
||||||
> cc -table mkN "Kirche"
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<LI>Proceed to determiners and pronouns in
|
|
||||||
<CODE>NounGer</CODE> (<CODE>DetCN UsePron DetQuant NumSg DefArt IndefArt UseN</CODE>) and
|
|
||||||
<CODE>StructuralGer</CODE> (<CODE>i_Pron this_Quant</CODE>). You also need some categories and
|
|
||||||
parameter types. At this point, it is maybe not possible to find out the final
|
|
||||||
linearization types of <CODE>CN</CODE>, <CODE>NP</CODE>, <CODE>Det</CODE>, and <CODE>Quant</CODE>, but at least you should
|
|
||||||
be able to correctly inflect noun phrases such as <I>every airplane</I>:
|
|
||||||
<PRE>
|
|
||||||
> i german/LangGer.gf
|
|
||||||
> l -table DetCN every_Det (UseN airplane_N)
|
|
||||||
|
|
||||||
Nom: jeder Flugzeug
|
|
||||||
Acc: jeden Flugzeug
|
|
||||||
Dat: jedem Flugzeug
|
|
||||||
Gen: jedes Flugzeugs
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<LI>Proceed to verbs: define <CODE>CatGer.V</CODE>, <CODE>ResGer.VForm</CODE>, and
|
|
||||||
<CODE>ParadigmsGer.mkV</CODE>. You may choose to exclude <CODE>notpresent</CODE>
|
|
||||||
cases at this point. But anyway, you will be able to inflect a good
|
|
||||||
number of verbs in <CODE>Lexicon</CODE>, such as
|
|
||||||
<CODE>live_V</CODE> (<CODE>mkV "leben"</CODE>).
|
|
||||||
<P></P>
|
|
||||||
<LI>Now you can soon form your first sentences: define <CODE>VP</CODE> and
|
|
||||||
<CODE>Cl</CODE> in <CODE>CatGer</CODE>, <CODE>VerbGer.UseV</CODE>, and <CODE>SentenceGer.PredVP</CODE>.
|
|
||||||
Even if you have excluded the tenses, you will be able to produce
|
|
||||||
<PRE>
|
|
||||||
> i -preproc=./mkPresent german/LangGer.gf
|
|
||||||
> l -table PredVP (UsePron i_Pron) (UseV live_V)
|
|
||||||
|
|
||||||
Pres Simul Pos Main: ich lebe
|
|
||||||
Pres Simul Pos Inv: lebe ich
|
|
||||||
Pres Simul Pos Sub: ich lebe
|
|
||||||
Pres Simul Neg Main: ich lebe nicht
|
|
||||||
Pres Simul Neg Inv: lebe ich nicht
|
|
||||||
Pres Simul Neg Sub: ich nicht lebe
|
|
||||||
</PRE>
|
|
||||||
You should also be able to parse:
|
|
||||||
<PRE>
|
|
||||||
> p -cat=Cl "ich lebe"
|
|
||||||
PredVP (UsePron i_Pron) (UseV live_V)
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<LI>Transitive verbs
|
|
||||||
(<CODE>CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a</CODE>)
|
|
||||||
are a natural next step, so that you can
|
|
||||||
produce <CODE>ich liebe dich</CODE> ("I love you").
|
|
||||||
<P></P>
|
|
||||||
<LI>Adjectives (<CODE>CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA</CODE>)
|
|
||||||
will force you to think about strong and weak declensions, so that you can
|
|
||||||
correctly inflect <I>mein neuer Wagen, dieser neue Wagen</I>
|
|
||||||
("my new car, this new car").
|
|
||||||
<P></P>
|
|
||||||
<LI>Once you have implemented the set
|
|
||||||
(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP),
|
|
||||||
you have overcome most of difficulties. You know roughly what parameters
|
|
||||||
and dependences there are in your language, and you can now proceed very
|
|
||||||
much in the order you please.
|
|
||||||
</OL>
|
|
||||||
|
|
||||||
<A NAME="toc11"></A>
|
|
||||||
<H3>The develop-test cycle</H3>
|
|
||||||
<P>
|
|
||||||
The following develop-test cycle will
|
|
||||||
be applied most of the time, both in the first steps described above
|
|
||||||
and in later steps where you are more on your own.
|
|
||||||
</P>
|
|
||||||
<OL>
|
|
||||||
<LI>Select a phrase category module, e.g. <CODE>NounGer</CODE>, and uncomment some
|
|
||||||
linearization rules (for instance, <CODE>DetCN</CODE>, as above).
|
|
||||||
<P></P>
|
|
||||||
<LI>Write down some German examples of this rule, for instance translations
|
|
||||||
of "the dog", "the house", "the big house", etc. Write these in all their
|
|
||||||
different forms (two numbers and four cases).
|
|
||||||
<P></P>
|
|
||||||
<LI>Think about the categories involved (<CODE>CN, NP, N, Det</CODE>) and the
|
|
||||||
variations they have. Encode this in the lincats of <CODE>CatGer</CODE>.
|
|
||||||
You may have to define some new parameter types in <CODE>ResGer</CODE>.
|
|
||||||
<P></P>
|
|
||||||
<LI>To be able to test the construction,
|
|
||||||
define some words you need to instantiate it
|
|
||||||
in <CODE>LexiconGer</CODE>. You will also need some regular inflection patterns
|
|
||||||
in<CODE>ParadigmsGer</CODE>.
|
|
||||||
<P></P>
|
|
||||||
<LI>Test by parsing, linearization,
|
|
||||||
and random generation. In particular, linearization to a table should
|
|
||||||
be used so that you see all forms produced; the <CODE>treebank</CODE> option
|
|
||||||
preserves the tree
|
|
||||||
<PRE>
|
|
||||||
> gr -cat=NP -number=20 | l -table -treebank
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<LI>Save some tree-linearization pairs for later regression testing. You can save
|
|
||||||
a gold standard treebank and use the Unix <CODE>diff</CODE> command to compare later
|
|
||||||
linearizations produced from the same list of trees. If you save the trees
|
|
||||||
in a file <CODE>trees</CODE>, you can do as follows:
|
|
||||||
<PRE>
|
|
||||||
> rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<LI>A file with trees testing all resource functions is included in the resource,
|
|
||||||
entitled <CODE>resource/exx-resource.gft</CODE>. A treebank can be created from this by
|
|
||||||
the Unix command
|
|
||||||
<PRE>
|
|
||||||
% runghc Make.hs test langs=Ger
|
|
||||||
</PRE>
|
|
||||||
</OL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
You are likely to run this cycle a few times for each linearization rule
|
|
||||||
you implement, and some hundreds of times altogether. There are roughly
|
|
||||||
70 <CODE>cat</CODE>s and
|
|
||||||
600 <CODE>funs</CODE> in <CODE>Lang</CODE> at the moment; 170 of the <CODE>funs</CODE> are outside the two
|
|
||||||
lexicon modules).
|
|
||||||
</P>
|
|
||||||
<A NAME="toc12"></A>
|
|
||||||
<H3>Auxiliary modules</H3>
|
|
||||||
<P>
|
|
||||||
These auxuliary <CODE>resource</CODE> modules will be written by you.
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><CODE>ResGer</CODE>: parameter types and auxiliary operations
|
|
||||||
(a resource for the resource grammar!)
|
|
||||||
<LI><CODE>ParadigmsGer</CODE>: complete inflection engine and most important regular paradigms
|
|
||||||
<LI><CODE>MorphoGer</CODE>: auxiliaries for <CODE>ParadigmsGer</CODE> and <CODE>StructuralGer</CODE>. This need
|
|
||||||
not be separate from <CODE>ResGer</CODE>.
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
These modules are language-independent and provided by the existing resource
|
|
||||||
package.
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><CODE>ParamX</CODE>: parameter types used in many languages
|
|
||||||
<LI><CODE>CommonX</CODE>: implementation of language-uniform categories
|
|
||||||
such as $Text$ and $Phr$, as well as of
|
|
||||||
the logical tense, anteriority, and polarity parameters
|
|
||||||
<LI><CODE>Coordination</CODE>: operations to deal with lists and coordination
|
|
||||||
<LI><CODE>Prelude</CODE>: general-purpose operations on strings, records,
|
|
||||||
truth values, etc.
|
|
||||||
<LI><CODE>Predef</CODE>: general-purpose operations with hard-coded definitions
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
An important decision is what rules to implement in terms of operations in
|
|
||||||
<CODE>ResGer</CODE>. The <B>golden rule of functional programming</B> says:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><I>Whenever you find yourself programming by copy and paste, write a function instead!</I>.
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
This rule suggests that an operation should be created if it is to be
|
|
||||||
used at least twice. At the same time, a sound principle of <B>vicinity</B> says:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><I>It should not require too much browsing to understand what a piece of code does.</I>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
From these two principles, we have derived the following practice:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>If an operation is needed <I>in two different modules</I>,
|
|
||||||
it should be created in as an <CODE>oper</CODE> in <CODE>ResGer</CODE>. An example is <CODE>mkClause</CODE>,
|
|
||||||
used in <CODE>Sentence</CODE>, <CODE>Question</CODE>, and <CODE>Relative</CODE>-
|
|
||||||
<LI>If an operation is needed <I>twice in the same module</I>, but never
|
|
||||||
outside, it should be created in the same module. Many examples are
|
|
||||||
found in <CODE>Numerals</CODE>.
|
|
||||||
<LI>If an operation is needed <I>twice in the same judgement</I>, but never
|
|
||||||
outside, it should be created by a <CODE>let</CODE> definition.
|
|
||||||
<LI>If an operation is only needed once, it should not be created as an <CODE>oper</CODE>,
|
|
||||||
but rather inlined. However, a <CODE>let</CODE> definition may well be in place just
|
|
||||||
to make the readable.
|
|
||||||
Most functions in phrase category modules
|
|
||||||
are implemented in this way.
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
This discipline is very different from the one followed in early
|
|
||||||
versions of the library (up to 0.9). We then valued the principle of
|
|
||||||
abstraction more than vicinity, creating layers of abstraction for
|
|
||||||
almost everything. This led in practice to the duplication of almost
|
|
||||||
all code on the <CODE>lin</CODE> and <CODE>oper</CODE> levels, and made the code
|
|
||||||
hard to understand and maintain.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc13"></A>
|
|
||||||
<H3>Morphology and lexicon</H3>
|
|
||||||
<P>
|
|
||||||
The paradigms needed to implement
|
|
||||||
<CODE>LexiconGer</CODE> are defined in
|
|
||||||
<CODE>ParadigmsGer</CODE>.
|
|
||||||
This module provides high-level ways to define the linearization of
|
|
||||||
lexical items, of categories <CODE>N, A, V</CODE> and their complement-taking
|
|
||||||
variants.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
For ease of use, the <CODE>Paradigms</CODE> modules follow a certain
|
|
||||||
naming convention. Thus they for each lexical category, such as <CODE>N</CODE>,
|
|
||||||
the overloaded functions, such as <CODE>mkN</CODE>, with the following cases:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>the worst-case construction of <CODE>N</CODE>. Its type signature
|
|
||||||
has the form
|
|
||||||
<PRE>
|
|
||||||
mkN : Str -> ... -> Str -> P -> ... -> Q -> N
|
|
||||||
</PRE>
|
|
||||||
with as many string and parameter arguments as can ever be needed to
|
|
||||||
construct an <CODE>N</CODE>.
|
|
||||||
<LI>the most regular cases, with just one string argument:
|
|
||||||
<PRE>
|
|
||||||
mkN : Str -> N
|
|
||||||
</PRE>
|
|
||||||
<LI>A language-dependent (small) set of functions to handle mild irregularities
|
|
||||||
and common exceptions.
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
For the complement-taking variants, such as <CODE>V2</CODE>, we provide
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>a case that takes a <CODE>V</CODE> and all necessary arguments, such
|
|
||||||
as case and preposition:
|
|
||||||
<PRE>
|
|
||||||
mkV2 : V -> Case -> Str -> V2 ;
|
|
||||||
</PRE>
|
|
||||||
<LI>a case that takes a <CODE>Str</CODE> and produces a transitive verb with the direct
|
|
||||||
object case:
|
|
||||||
<PRE>
|
|
||||||
mkV2 : Str -> V2 ;
|
|
||||||
</PRE>
|
|
||||||
<LI>A language-dependent (small) set of functions to handle common special cases,
|
|
||||||
such as transitive verbs that are not regular:
|
|
||||||
<PRE>
|
|
||||||
mkV2 : V -> V2 ;
|
|
||||||
</PRE>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
The golden rule for the design of paradigms is that
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><I>The user of the library will only need function applications with constants and strings, never any records or tables.</I>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
The discipline of data abstraction moreover requires that the user of the resource
|
|
||||||
is not given access to parameter constructors, but only to constants that denote
|
|
||||||
them. This gives the resource grammarian the freedom to change the underlying
|
|
||||||
data representation if needed. It means that the <CODE>ParadigmsGer</CODE> module has
|
|
||||||
to define constants for those parameter types and constructors that
|
|
||||||
the application grammarian may need to use, e.g.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
oper
|
|
||||||
Case : Type ;
|
|
||||||
nominative, accusative, genitive, dative : Case ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
These constants are defined in terms of parameter types and constructors
|
|
||||||
in <CODE>ResGer</CODE> and <CODE>MorphoGer</CODE>, which modules are not
|
|
||||||
visible to the application grammarian.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc14"></A>
|
|
||||||
<H3>Lock fields</H3>
|
|
||||||
<P>
|
|
||||||
An important difference between <CODE>MorphoGer</CODE> and
|
|
||||||
<CODE>ParadigmsGer</CODE> is that the former uses "raw" record types
|
|
||||||
for word classes, whereas the latter used category symbols defined in
|
|
||||||
<CODE>CatGer</CODE>. When these category symbols are used to denote
|
|
||||||
record types in a resource modules, such as <CODE>ParadigmsGer</CODE>,
|
|
||||||
a <B>lock field</B> is added to the record, so that categories
|
|
||||||
with the same implementation are not confused with each other.
|
|
||||||
(This is inspired by the <CODE>newtype</CODE> discipline in Haskell.)
|
|
||||||
For instance, the lincats of adverbs and conjunctions are the same
|
|
||||||
in <CODE>CommonX</CODE> (and therefore in <CODE>CatGer</CODE>, which inherits it):
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
lincat Adv = {s : Str} ;
|
|
||||||
lincat Conj = {s : Str} ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
But when these category symbols are used to denote their linearization
|
|
||||||
types in resource module, these definitions are translated to
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
oper Adv : Type = {s : Str ; lock_Adv : {}} ;
|
|
||||||
oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
In this way, the user of a resource grammar cannot confuse adverbs with
|
|
||||||
conjunctions. In other words, the lock fields force the type checker
|
|
||||||
to function as grammaticality checker.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
When the resource grammar is <CODE>open</CODE>ed in an application grammar, the
|
|
||||||
lock fields are never seen (except possibly in type error messages),
|
|
||||||
and the application grammarian should never write them herself. If she
|
|
||||||
has to do this, it is a sign that the resource grammar is incomplete, and
|
|
||||||
the proper way to proceed is to fix the resource grammar.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The resource grammarian has to provide the dummy lock field values
|
|
||||||
in her hidden definitions of constants in <CODE>Paradigms</CODE>. For instance,
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
mkAdv : Str -> Adv ;
|
|
||||||
-- mkAdv s = {s = s ; lock_Adv = <>} ;
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<A NAME="toc15"></A>
|
|
||||||
<H3>Lexicon construction</H3>
|
|
||||||
<P>
|
|
||||||
The lexicon belonging to <CODE>LangGer</CODE> consists of two modules:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><CODE>StructuralGer</CODE>, structural words, built by using both
|
|
||||||
<CODE>ParadigmsGer</CODE> and <CODE>MorphoGer</CODE>.
|
|
||||||
<LI><CODE>LexiconGer</CODE>, content words, built by using <CODE>ParadigmsGer</CODE> only.
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
The reason why <CODE>MorphoGer</CODE> has to be used in <CODE>StructuralGer</CODE>
|
|
||||||
is that <CODE>ParadigmsGer</CODE> does not contain constructors for closed
|
|
||||||
word classes such as pronouns and determiners. The reason why we
|
|
||||||
recommend <CODE>ParadigmsGer</CODE> for building <CODE>LexiconGer</CODE> is that
|
|
||||||
the coverage of the paradigms gets thereby tested and that the
|
|
||||||
use of the paradigms in <CODE>LexiconGer</CODE> gives a good set of examples for
|
|
||||||
those who want to build new lexica.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc16"></A>
|
|
||||||
<H2>Lexicon extension</H2>
|
|
||||||
<A NAME="toc17"></A>
|
|
||||||
<H3>The irregularity lexicon</H3>
|
|
||||||
<P>
|
|
||||||
It is useful in most languages to provide a separate module of irregular
|
|
||||||
verbs and other words which are difficult for a lexicographer
|
|
||||||
to handle. There are usually a limited number of such words - a
|
|
||||||
few hundred perhaps. Building such a lexicon separately also
|
|
||||||
makes it less important to cover <I>everything</I> by the
|
|
||||||
worst-case variants of the paradigms <CODE>mkV</CODE> etc.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc18"></A>
|
|
||||||
<H3>Lexicon extraction from a word list</H3>
|
|
||||||
<P>
|
|
||||||
You can often find resources such as lists of
|
|
||||||
irregular verbs on the internet. For instance, the
|
|
||||||
Irregular German Verb page
|
|
||||||
previously found in
|
|
||||||
<CODE>http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html</CODE>
|
|
||||||
page gives a list of verbs in the
|
|
||||||
traditional tabular format, which begins as follows:
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
backen (du bäckst, er bäckt) backte [buk] gebacken
|
|
||||||
befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
|
|
||||||
beginnen begann (begönne; begänne) begonnen
|
|
||||||
beißen biß gebissen
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
All you have to do is to write a suitable verb paradigm
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
irregV : (x1,_,_,_,_,x6 : Str) -> V ;
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
and a Perl or Python or Haskell script that transforms
|
|
||||||
the table to
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
|
|
||||||
befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
|
|
||||||
</PRE>
|
|
||||||
<P></P>
|
|
||||||
<P>
|
|
||||||
When using ready-made word lists, you should think about
|
|
||||||
coyright issues. All resource grammar material should
|
|
||||||
be provided under GNU Lesser General Public License (LGPL).
|
|
||||||
</P>
|
|
||||||
<A NAME="toc19"></A>
|
|
||||||
<H3>Lexicon extraction from raw text data</H3>
|
|
||||||
<P>
|
|
||||||
This is a cheap technique to build a lexicon of thousands
|
|
||||||
of words, if text data is available in digital format.
|
|
||||||
See the <A HREF="http://www.cs.chalmers.se/~markus/extract/">Extract Homepage</A>
|
|
||||||
homepage for details.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc20"></A>
|
|
||||||
<H3>Bootstrapping with smart paradigms</H3>
|
|
||||||
<P>
|
|
||||||
This is another cheap technique, where you need as input a list of words with
|
|
||||||
part-of-speech marking. You initialize the lexicon by using the one-argument
|
|
||||||
<CODE>mkN</CODE> etc paradigms, and add forms to those words that do not come out right.
|
|
||||||
This procedure is described in the paper
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
A. Ranta.
|
|
||||||
How predictable is Finnish morphology? An experiment on lexicon construction.
|
|
||||||
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
|
|
||||||
<I>Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein</I>,
|
|
||||||
University of Uppsala,
|
|
||||||
2008.
|
|
||||||
Available from the <A HREF="http://publications.uu.se/abstract.xsql?dbid=8933">series homepage</A>
|
|
||||||
</P>
|
|
||||||
<A NAME="toc21"></A>
|
|
||||||
<H2>Extending the resource grammar API</H2>
|
|
||||||
<P>
|
|
||||||
Sooner or later it will happen that the resource grammar API
|
|
||||||
does not suffice for all applications. A common reason is
|
|
||||||
that it does not include idiomatic expressions in a given language.
|
|
||||||
The solution then is in the first place to build language-specific
|
|
||||||
extension modules, like <CODE>ExtraGer</CODE>.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc22"></A>
|
|
||||||
<H2>Using parametrized modules</H2>
|
|
||||||
<A NAME="toc23"></A>
|
|
||||||
<H3>Writing an instance of parametrized resource grammar implementation</H3>
|
|
||||||
<P>
|
|
||||||
Above we have looked at how a resource implementation is built by
|
|
||||||
the copy and paste method (from English to German), that is, formally
|
|
||||||
speaking, from scratch. A more elegant solution available for
|
|
||||||
families of languages such as Romance and Scandinavian is to
|
|
||||||
use parametrized modules. The advantages are
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>theoretical: linguistic generalizations and insights
|
|
||||||
<LI>practical: maintainability improves with fewer components
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Here is a set of
|
|
||||||
<A HREF="http://www.cs.chalmers.se/~aarne/geocal2006.pdf">slides</A>
|
|
||||||
on the topic.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc24"></A>
|
|
||||||
<H3>Parametrizing a resource grammar implementation</H3>
|
|
||||||
<P>
|
|
||||||
This is the most demanding form of resource grammar writing.
|
|
||||||
We do <I>not</I> recommend the method of parametrizing from the
|
|
||||||
beginning: it is easier to have one language first implemented
|
|
||||||
in the conventional way and then add another language of the
|
|
||||||
same family by aprametrization. This means that the copy and
|
|
||||||
paste method is still used, but at this time the differences
|
|
||||||
are put into an <CODE>interface</CODE> module.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc25"></A>
|
|
||||||
<H2>Character encoding and transliterations</H2>
|
|
||||||
<P>
|
|
||||||
This section is relevant for languages using a non-ASCII character set.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc26"></A>
|
|
||||||
<H2>Coding conventions in GF</H2>
|
|
||||||
<P>
|
|
||||||
From version 3.0, GF follows a simple encoding convention:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>GF source files may follow any encoding, such as isolatin-1 or UTF-8;
|
|
||||||
the default is isolatin-1, and UTF8 must be indicated by the judgement
|
|
||||||
<PRE>
|
|
||||||
flags coding = utf8 ;
|
|
||||||
</PRE>
|
|
||||||
in each source module.
|
|
||||||
<LI>for internal processing, all characters are converted to 16-bit unicode,
|
|
||||||
as the first step of grammar compilation guided by the <CODE>coding</CODE> flag
|
|
||||||
<LI>as the last step of compilation, all characters are converted to UTF-8
|
|
||||||
<LI>thus, GF object files (<CODE>gfo</CODE>) and the Portable Grammar Format (<CODE>pgf</CODE>)
|
|
||||||
are in UTF-8
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Most current resource grammars use isolatin-1 in the source, but this does
|
|
||||||
not affect their use in parallel with grammars written in other encodings.
|
|
||||||
In fact, a grammar can be put up from modules using different codings.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<B>Warning</B>. While string literals may contain any characters, identifiers
|
|
||||||
must be isolatin-1 letters (or digits, underscores, or dashes). This has to
|
|
||||||
do with the restrictions of the lexer tool that is used.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc27"></A>
|
|
||||||
<H2>Transliterations</H2>
|
|
||||||
<P>
|
|
||||||
While UTF-8 is well supported by most web browsers, its use in terminals and
|
|
||||||
text editors may cause disappointment. Many grammarians therefore prefer to
|
|
||||||
use ASCII transliterations. GF 3.0beta2 provides the following built-in
|
|
||||||
transliterations:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>Arabic
|
|
||||||
<LI>Devanagari (Hindi)
|
|
||||||
<LI>Thai
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
New transliterations can be defined in the GF source file
|
|
||||||
<A HREF="../src/GF/Text/Transliterations.hs"><CODE>GF/Text/Transliterations.hs</CODE></A>.
|
|
||||||
This file also gives instructions on how new ones are added.
|
|
||||||
</P>
|
|
||||||
|
|
||||||
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
|
|
||||||
<!-- cmdline: txt2tags -\-toc Resource-HOWTO.txt -->
|
|
||||||
</BODY></HTML>
|
|
||||||
@@ -1,827 +0,0 @@
|
|||||||
Resource grammar writing HOWTO
|
|
||||||
Author: Aarne Ranta <aarne (at) cs.chalmers.se>
|
|
||||||
Last update: %%date(%c)
|
|
||||||
|
|
||||||
% NOTE: this is a txt2tags file.
|
|
||||||
% Create an html file from this file using:
|
|
||||||
% txt2tags --toc -thtml Resource-HOWTO.txt
|
|
||||||
|
|
||||||
%!target:html
|
|
||||||
|
|
||||||
**History**
|
|
||||||
|
|
||||||
September 2008: updated for Version 1.5.
|
|
||||||
|
|
||||||
October 2007: updated for Version 1.2.
|
|
||||||
|
|
||||||
January 2006: first version.
|
|
||||||
|
|
||||||
|
|
||||||
The purpose of this document is to tell how to implement the GF
|
|
||||||
resource grammar API for a new language. We will //not// cover how
|
|
||||||
to use the resource grammar, nor how to change the API. But we
|
|
||||||
will give some hints how to extend the API.
|
|
||||||
|
|
||||||
A manual for using the resource grammar is found in
|
|
||||||
|
|
||||||
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html`` ../lib/resource/doc/synopsis.html].
|
|
||||||
|
|
||||||
A tutorial on GF, also introducing the idea of resource grammars, is found in
|
|
||||||
|
|
||||||
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html`` ./gf-tutorial.html].
|
|
||||||
|
|
||||||
This document concerns the API v. 1.5, while the current stable release is 1.4.
|
|
||||||
You can find the code for the stable release in
|
|
||||||
|
|
||||||
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/`` ../lib/resource]
|
|
||||||
|
|
||||||
and the next release in
|
|
||||||
|
|
||||||
[``www.cs.chalmers.se/Cs/Research/Language-technology/GF/next-lib/src/`` ../next-lib/src]
|
|
||||||
|
|
||||||
It is recommended to build new grammars to match the next release.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==The resource grammar structure==
|
|
||||||
|
|
||||||
The library is divided into a bunch of modules, whose dependencies
|
|
||||||
are given in the following figure.
|
|
||||||
|
|
||||||
[Syntax.png]
|
|
||||||
|
|
||||||
Modules of different kinds are distinguished as follows:
|
|
||||||
- solid contours: module seen by end users
|
|
||||||
- dashed contours: internal module
|
|
||||||
- ellipse: abstract/concrete pair of modules
|
|
||||||
- rectangle: resource or instance
|
|
||||||
- diamond: interface
|
|
||||||
|
|
||||||
|
|
||||||
Put in another way:
|
|
||||||
- solid rectangles and diamonds: user-accessible library API
|
|
||||||
- solid ellipses: user-accessible top-level grammar for parsing and linearization
|
|
||||||
- dashed contours: not visible to users
|
|
||||||
|
|
||||||
|
|
||||||
The dashed ellipses form the main parts of the implementation, on which the resource
|
|
||||||
grammar programmer has to work with. She also has to work on the ``Paradigms``
|
|
||||||
module. The rest of the modules can be produced mechanically from corresponding
|
|
||||||
modules for other languages, by just changing the language codes appearing in
|
|
||||||
their module headers.
|
|
||||||
|
|
||||||
The module structure is rather flat: most modules are direct
|
|
||||||
parents of ``Grammar``. The idea
|
|
||||||
is that the implementors can concentrate on one linguistic aspect at a time, or
|
|
||||||
also distribute the work among several authors. The module ``Cat``
|
|
||||||
defines the "glue" that ties the aspects together - a type system
|
|
||||||
to which all the other modules conform, so that e.g. ``NP`` means
|
|
||||||
the same thing in those modules that use ``NP``s and those that
|
|
||||||
constructs them.
|
|
||||||
|
|
||||||
|
|
||||||
===Library API modules===
|
|
||||||
|
|
||||||
For the user of the library, these modules are the most important ones.
|
|
||||||
In a typical application, it is enough to open ``Paradigms`` and ``Syntax``.
|
|
||||||
The module ``Try`` combines these two, making it possible to experiment
|
|
||||||
with combinations of syntactic and lexical constructors by using the
|
|
||||||
``cc`` command in the GF shell. Here are short explanations of each API module:
|
|
||||||
- ``Try``: the whole resource library for a language (``Paradigms``, ``Syntax``,
|
|
||||||
``Irreg``, and ``Extra``);
|
|
||||||
produced mechanically as a collection of modules
|
|
||||||
- ``Syntax``: language-independent categories, syntax functions, and structural words;
|
|
||||||
produced mechanically as a collection of modules
|
|
||||||
- ``Constructors``: language-independent syntax functions and structural words;
|
|
||||||
produced mechanically via functor instantiation
|
|
||||||
- ``Paradigms``: language-dependent morphological paradigms
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Phrase category modules===
|
|
||||||
|
|
||||||
The immediate parents of ``Grammar`` will be called **phrase category modules**,
|
|
||||||
since each of them concentrates on a particular phrase category (nouns, verbs,
|
|
||||||
adjectives, sentences,...). A phrase category module tells
|
|
||||||
//how to construct phrases in that category//. You will find out that
|
|
||||||
all functions in any of these modules have the same value type (or maybe
|
|
||||||
one of a small number of different types). Thus we have
|
|
||||||
- ``Noun``: construction of nouns and noun phrases
|
|
||||||
- ``Adjective``: construction of adjectival phrases
|
|
||||||
- ``Verb``: construction of verb phrases
|
|
||||||
- ``Adverb``: construction of adverbial phrases
|
|
||||||
- ``Numeral``: construction of cardinal and ordinal numerals
|
|
||||||
- ``Sentence``: construction of sentences and imperatives
|
|
||||||
- ``Question``: construction of questions
|
|
||||||
- ``Relative``: construction of relative clauses
|
|
||||||
- ``Conjunction``: coordination of phrases
|
|
||||||
- ``Phrase``: construction of the major units of text and speech
|
|
||||||
- ``Text``: construction of texts as sequences of phrases
|
|
||||||
- ``Idiom``: idiomatic expressions such as existentials
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Infrastructure modules===
|
|
||||||
|
|
||||||
Expressions of each phrase category are constructed in the corresponding
|
|
||||||
phrase category module. But their //use// takes mostly place in other modules.
|
|
||||||
For instance, noun phrases, which are constructed in ``Noun``, are
|
|
||||||
used as arguments of functions of almost all other phrase category modules.
|
|
||||||
How can we build all these modules independently of each other?
|
|
||||||
|
|
||||||
As usual in typeful programming, the //only// thing you need to know
|
|
||||||
about an object you use is its type. When writing a linearization rule
|
|
||||||
for a GF abstract syntax function, the only thing you need to know is
|
|
||||||
the linearization types of its value and argument categories. To achieve
|
|
||||||
the division of the resource grammar to several parallel phrase category modules,
|
|
||||||
what we need is an underlying definition of the linearization types. This
|
|
||||||
definition is given as the implementation of
|
|
||||||
- ``Cat``: syntactic categories of the resource grammar
|
|
||||||
|
|
||||||
|
|
||||||
Any resource grammar implementation has first to agree on how to implement
|
|
||||||
``Cat``. Luckily enough, even this can be done incrementally: you
|
|
||||||
can skip the ``lincat`` definition of a category and use the default
|
|
||||||
``{s : Str}`` until you need to change it to something else. In
|
|
||||||
English, for instance, many categories do have this linearization type.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Lexical modules===
|
|
||||||
|
|
||||||
What is lexical and what is syntactic is not as clearcut in GF as in
|
|
||||||
some other grammar formalisms. Logically, lexical means atom, i.e. a
|
|
||||||
``fun`` with no arguments. Linguistically, one may add to this
|
|
||||||
that the ``lin`` consists of only one token (or of a table whose values
|
|
||||||
are single tokens). Even in the restricted lexicon included in the resource
|
|
||||||
API, the latter rule is sometimes violated in some languages. For instance,
|
|
||||||
``Structural.both7and_DConj`` is an atom, but its linearization is
|
|
||||||
two words e.g. //both - and//.
|
|
||||||
|
|
||||||
Another characterization of lexical is that lexical units can be added
|
|
||||||
almost //ad libitum//, and they cannot be defined in terms of already
|
|
||||||
given rules. The lexical modules of the resource API are thus more like
|
|
||||||
samples than complete lists. There are two such modules:
|
|
||||||
- ``Structural``: structural words (determiners, conjunctions,...)
|
|
||||||
- ``Lexicon``: basic everyday content words (nouns, verbs,...)
|
|
||||||
|
|
||||||
|
|
||||||
The module ``Structural`` aims for completeness, and is likely to
|
|
||||||
be extended in future releases of the resource. The module ``Lexicon``
|
|
||||||
gives a "random" list of words, which enables testing the syntax.
|
|
||||||
It also provides a check list for morphology, since those words are likely to include
|
|
||||||
most morphological patterns of the language.
|
|
||||||
|
|
||||||
In the case of ``Lexicon`` it may come out clearer than anywhere else
|
|
||||||
in the API that it is impossible to give exact translation equivalents in
|
|
||||||
different languages on the level of a resource grammar. This is no problem,
|
|
||||||
since application grammars can use the resource in different ways for
|
|
||||||
different languages.
|
|
||||||
|
|
||||||
|
|
||||||
==Language-dependent syntax modules==
|
|
||||||
|
|
||||||
In addition to the common API, there is room for language-dependent extensions
|
|
||||||
of the resource. The top level of each languages looks as follows (with German
|
|
||||||
as example):
|
|
||||||
```
|
|
||||||
abstract AllGerAbs = Lang, ExtraGerAbs, IrregGerAbs
|
|
||||||
```
|
|
||||||
where ``ExtraGerAbs`` is a collection of syntactic structures specific to German,
|
|
||||||
and ``IrregGerAbs`` is a dictionary of irregular words of German
|
|
||||||
(at the moment, just verbs). Each of these language-specific grammars has
|
|
||||||
the potential to grow into a full-scale grammar of the language. These grammar
|
|
||||||
can also be used as libraries, but the possibility of using functors is lost.
|
|
||||||
|
|
||||||
To give a better overview of language-specific structures,
|
|
||||||
modules like ``ExtraGerAbs``
|
|
||||||
are built from a language-independent module ``ExtraAbs``
|
|
||||||
by restricted inheritance:
|
|
||||||
```
|
|
||||||
abstract ExtraGerAbs = Extra [f,g,...]
|
|
||||||
```
|
|
||||||
Thus any category and function in ``Extra`` may be shared by a subset of all
|
|
||||||
languages. One can see this set-up as a matrix, which tells
|
|
||||||
what ``Extra`` structures
|
|
||||||
are implemented in what languages. For the common API in ``Grammar``, the matrix
|
|
||||||
is filled with 1's (everything is implemented in every language).
|
|
||||||
|
|
||||||
In a minimal resource grammar implementation, the language-dependent
|
|
||||||
extensions are just empty modules, but it is good to provide them for
|
|
||||||
the sake of uniformity.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===The present-tense fragment===
|
|
||||||
|
|
||||||
Some lines in the resource library are suffixed with the comment
|
|
||||||
```
|
|
||||||
--# notpresent
|
|
||||||
```
|
|
||||||
which is used by a preprocessor to exclude those lines from
|
|
||||||
a reduced version of the full resource. This present-tense-only
|
|
||||||
version is useful for applications in most technical text, since
|
|
||||||
they reduce the grammar size and compilation time. It can also
|
|
||||||
be useful to exclude those lines in a first version of resource
|
|
||||||
implementation. To compile a grammar with present-tense-only, use
|
|
||||||
```
|
|
||||||
make Present
|
|
||||||
```
|
|
||||||
with ``resource/Makefile``.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Phases of the work==
|
|
||||||
|
|
||||||
===Putting up a directory===
|
|
||||||
|
|
||||||
Unless you are writing an instance of a parametrized implementation
|
|
||||||
(Romance or Scandinavian), which will be covered later, the
|
|
||||||
simplest way is to follow roughly the following procedure. Assume you
|
|
||||||
are building a grammar for the German language. Here are the first steps,
|
|
||||||
which we actually followed ourselves when building the German implementation
|
|
||||||
of resource v. 1.0 at Ubuntu linux. We have slightly modified them to
|
|
||||||
match resource v. 1.5 and GF v. 3.0.
|
|
||||||
|
|
||||||
+ Create a sister directory for ``GF/lib/resource/english``, named
|
|
||||||
``german``.
|
|
||||||
```
|
|
||||||
cd GF/lib/resource/
|
|
||||||
mkdir german
|
|
||||||
cd german
|
|
||||||
```
|
|
||||||
|
|
||||||
+ Check out the [ISO 639 3-letter language code
|
|
||||||
http://www.w3.org/WAI/ER/IG/ert/iso639.htm]
|
|
||||||
for German: both ``Ger`` and ``Deu`` are given, and we pick ``Ger``.
|
|
||||||
(We use the 3-letter codes rather than the more common 2-letter codes,
|
|
||||||
since they will suffice for many more languages!)
|
|
||||||
|
|
||||||
+ Copy the ``*Eng.gf`` files from ``english`` ``german``,
|
|
||||||
and rename them:
|
|
||||||
```
|
|
||||||
cp ../english/*Eng.gf .
|
|
||||||
rename 's/Eng/Ger/' *Eng.gf
|
|
||||||
```
|
|
||||||
If you don't have the ``rename`` command, you can use a bash script with ``mv``.
|
|
||||||
|
|
||||||
|
|
||||||
+ Change the ``Eng`` module references to ``Ger`` references
|
|
||||||
in all files:
|
|
||||||
```
|
|
||||||
sed -i 's/English/German/g' *Ger.gf
|
|
||||||
sed -i 's/Eng/Ger/g' *Ger.gf
|
|
||||||
```
|
|
||||||
The first line prevents changing the word ``English``, which appears
|
|
||||||
here and there in comments, to ``Gerlish``. The ``sed`` command syntax
|
|
||||||
may vary depending on your operating system.
|
|
||||||
|
|
||||||
+ This may of course change unwanted occurrences of the
|
|
||||||
string ``Eng`` - verify this by
|
|
||||||
```
|
|
||||||
grep Ger *.gf
|
|
||||||
```
|
|
||||||
But you will have to make lots of manual changes in all files anyway!
|
|
||||||
|
|
||||||
+ Comment out the contents of these files:
|
|
||||||
```
|
|
||||||
sed -i 's/^/--/' *Ger.gf
|
|
||||||
```
|
|
||||||
This will give you a set of templates out of which the grammar
|
|
||||||
will grow as you uncomment and modify the files rule by rule.
|
|
||||||
|
|
||||||
+ In all ``.gf`` files, uncomment the module headers and brackets,
|
|
||||||
leaving the module bodies commented. Unfortunately, there is no
|
|
||||||
simple way to do this automatically (or to avoid commenting these
|
|
||||||
lines in the previous step) - but uncommenting the first
|
|
||||||
and the last lines will actually do the job for many of the files.
|
|
||||||
|
|
||||||
+ Uncomment the contents of the main grammar file:
|
|
||||||
```
|
|
||||||
sed -i 's/^--//' LangGer.gf
|
|
||||||
```
|
|
||||||
|
|
||||||
+ Now you can open the grammar ``LangGer`` in GF:
|
|
||||||
```
|
|
||||||
gf LangGer.gf
|
|
||||||
```
|
|
||||||
You will get lots of warnings on missing rules, but the grammar will compile.
|
|
||||||
|
|
||||||
+ At all the following steps you will now have a valid, but incomplete
|
|
||||||
GF grammar. The GF command
|
|
||||||
```
|
|
||||||
pg -missing
|
|
||||||
```
|
|
||||||
tells you what exactly is missing.
|
|
||||||
|
|
||||||
|
|
||||||
Here is the module structure of ``LangGer``. It has been simplified by leaving out
|
|
||||||
the majority of the phrase category modules. Each of them has the same dependencies
|
|
||||||
as ``VerbGer``, whose complete dependencies are shown as an example.
|
|
||||||
|
|
||||||
[German.png]
|
|
||||||
|
|
||||||
|
|
||||||
===Direction of work===
|
|
||||||
|
|
||||||
The real work starts now. There are many ways to proceed, the most obvious ones being
|
|
||||||
- Top-down: start from the module ``Phrase`` and go down to ``Sentence``, then
|
|
||||||
``Verb``, ``Noun``, and in the end ``Lexicon``. In this way, you are all the time
|
|
||||||
building complete phrases, and add them with more content as you proceed.
|
|
||||||
**This approach is not recommended**. It is impossible to test the rules if
|
|
||||||
you have no words to apply the constructions to.
|
|
||||||
|
|
||||||
- Bottom-up: set as your first goal to implement ``Lexicon``. To this end, you
|
|
||||||
need to write ``ParadigmsGer``, which in turn needs parts of
|
|
||||||
``MorphoGer`` and ``ResGer``.
|
|
||||||
**This approach is not recommended**. You can get stuck to details of
|
|
||||||
morphology such as irregular words, and you don't have enough grasp about
|
|
||||||
the type system to decide what forms to cover in morphology.
|
|
||||||
|
|
||||||
|
|
||||||
The practical working direction is thus a saw-like motion between the morphological
|
|
||||||
and top-level modules. Here is a possible course of the work that gives enough
|
|
||||||
test data and enough general view at any point:
|
|
||||||
+ Define ``Cat.N`` and the required parameter types in ``ResGer``. As we define
|
|
||||||
```
|
|
||||||
lincat N = {s : Number => Case => Str ; g : Gender} ;
|
|
||||||
```
|
|
||||||
we need the parameter types ``Number``, ``Case``, and ``Gender``. The definition
|
|
||||||
of ``Number`` in [``common/ParamX`` ../lib/resource/common/ParamX.gf]
|
|
||||||
works for German, so we
|
|
||||||
use it and just define ``Case`` and ``Gender`` in ``ResGer``.
|
|
||||||
|
|
||||||
+ Define some cases of ``mkN`` in ``ParadigmsGer``. In this way you can
|
|
||||||
already implement a huge amount of nouns correctly in ``LexiconGer``. Actually
|
|
||||||
just adding the worst-case instance of ``mkN`` (the one taking the most
|
|
||||||
arguments) should suffice for every noun - but,
|
|
||||||
since it is tedious to use, you
|
|
||||||
might proceed to the next step before returning to morphology and defining the
|
|
||||||
real work horse, ``mkN`` taking two forms and a gender.
|
|
||||||
|
|
||||||
+ While doing this, you may want to test the resource independently. Do this by
|
|
||||||
starting the GF shell in the ``resource`` directory, by the commands
|
|
||||||
```
|
|
||||||
> i -retain german/ParadigmsGer
|
|
||||||
> cc -table mkN "Kirche"
|
|
||||||
```
|
|
||||||
|
|
||||||
+ Proceed to determiners and pronouns in
|
|
||||||
``NounGer`` (``DetCN UsePron DetQuant NumSg DefArt IndefArt UseN``) and
|
|
||||||
``StructuralGer`` (``i_Pron this_Quant``). You also need some categories and
|
|
||||||
parameter types. At this point, it is maybe not possible to find out the final
|
|
||||||
linearization types of ``CN``, ``NP``, ``Det``, and ``Quant``, but at least you should
|
|
||||||
be able to correctly inflect noun phrases such as //every airplane//:
|
|
||||||
```
|
|
||||||
> i german/LangGer.gf
|
|
||||||
> l -table DetCN every_Det (UseN airplane_N)
|
|
||||||
|
|
||||||
Nom: jeder Flugzeug
|
|
||||||
Acc: jeden Flugzeug
|
|
||||||
Dat: jedem Flugzeug
|
|
||||||
Gen: jedes Flugzeugs
|
|
||||||
```
|
|
||||||
|
|
||||||
+ Proceed to verbs: define ``CatGer.V``, ``ResGer.VForm``, and
|
|
||||||
``ParadigmsGer.mkV``. You may choose to exclude ``notpresent``
|
|
||||||
cases at this point. But anyway, you will be able to inflect a good
|
|
||||||
number of verbs in ``Lexicon``, such as
|
|
||||||
``live_V`` (``mkV "leben"``).
|
|
||||||
|
|
||||||
+ Now you can soon form your first sentences: define ``VP`` and
|
|
||||||
``Cl`` in ``CatGer``, ``VerbGer.UseV``, and ``SentenceGer.PredVP``.
|
|
||||||
Even if you have excluded the tenses, you will be able to produce
|
|
||||||
```
|
|
||||||
> i -preproc=./mkPresent german/LangGer.gf
|
|
||||||
> l -table PredVP (UsePron i_Pron) (UseV live_V)
|
|
||||||
|
|
||||||
Pres Simul Pos Main: ich lebe
|
|
||||||
Pres Simul Pos Inv: lebe ich
|
|
||||||
Pres Simul Pos Sub: ich lebe
|
|
||||||
Pres Simul Neg Main: ich lebe nicht
|
|
||||||
Pres Simul Neg Inv: lebe ich nicht
|
|
||||||
Pres Simul Neg Sub: ich nicht lebe
|
|
||||||
```
|
|
||||||
You should also be able to parse:
|
|
||||||
```
|
|
||||||
> p -cat=Cl "ich lebe"
|
|
||||||
PredVP (UsePron i_Pron) (UseV live_V)
|
|
||||||
```
|
|
||||||
|
|
||||||
+ Transitive verbs
|
|
||||||
(``CatGer.V2 CatGer.VPSlash ParadigmsGer.mkV2 VerbGer.ComplSlash VerbGer.SlashV2a``)
|
|
||||||
are a natural next step, so that you can
|
|
||||||
produce ``ich liebe dich`` ("I love you").
|
|
||||||
|
|
||||||
+ Adjectives (``CatGer.A ParadigmsGer.mkA NounGer.AdjCN AdjectiveGer.PositA``)
|
|
||||||
will force you to think about strong and weak declensions, so that you can
|
|
||||||
correctly inflect //mein neuer Wagen, dieser neue Wagen//
|
|
||||||
("my new car, this new car").
|
|
||||||
|
|
||||||
+ Once you have implemented the set
|
|
||||||
(``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplSlash Verb.SlashV2a Sentence.PredVP),
|
|
||||||
you have overcome most of difficulties. You know roughly what parameters
|
|
||||||
and dependences there are in your language, and you can now proceed very
|
|
||||||
much in the order you please.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===The develop-test cycle===
|
|
||||||
|
|
||||||
The following develop-test cycle will
|
|
||||||
be applied most of the time, both in the first steps described above
|
|
||||||
and in later steps where you are more on your own.
|
|
||||||
|
|
||||||
+ Select a phrase category module, e.g. ``NounGer``, and uncomment some
|
|
||||||
linearization rules (for instance, ``DetCN``, as above).
|
|
||||||
|
|
||||||
+ Write down some German examples of this rule, for instance translations
|
|
||||||
of "the dog", "the house", "the big house", etc. Write these in all their
|
|
||||||
different forms (two numbers and four cases).
|
|
||||||
|
|
||||||
+ Think about the categories involved (``CN, NP, N, Det``) and the
|
|
||||||
variations they have. Encode this in the lincats of ``CatGer``.
|
|
||||||
You may have to define some new parameter types in ``ResGer``.
|
|
||||||
|
|
||||||
+ To be able to test the construction,
|
|
||||||
define some words you need to instantiate it
|
|
||||||
in ``LexiconGer``. You will also need some regular inflection patterns
|
|
||||||
in``ParadigmsGer``.
|
|
||||||
|
|
||||||
+ Test by parsing, linearization,
|
|
||||||
and random generation. In particular, linearization to a table should
|
|
||||||
be used so that you see all forms produced; the ``treebank`` option
|
|
||||||
preserves the tree
|
|
||||||
```
|
|
||||||
> gr -cat=NP -number=20 | l -table -treebank
|
|
||||||
```
|
|
||||||
|
|
||||||
+ Save some tree-linearization pairs for later regression testing. You can save
|
|
||||||
a gold standard treebank and use the Unix ``diff`` command to compare later
|
|
||||||
linearizations produced from the same list of trees. If you save the trees
|
|
||||||
in a file ``trees``, you can do as follows:
|
|
||||||
```
|
|
||||||
> rf -file=trees -tree -lines | l -table -treebank | wf -file=treebank
|
|
||||||
```
|
|
||||||
|
|
||||||
+ A file with trees testing all resource functions is included in the resource,
|
|
||||||
entitled ``resource/exx-resource.gft``. A treebank can be created from this by
|
|
||||||
the Unix command
|
|
||||||
```
|
|
||||||
% runghc Make.hs test langs=Ger
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
You are likely to run this cycle a few times for each linearization rule
|
|
||||||
you implement, and some hundreds of times altogether. There are roughly
|
|
||||||
70 ``cat``s and
|
|
||||||
600 ``funs`` in ``Lang`` at the moment; 170 of the ``funs`` are outside the two
|
|
||||||
lexicon modules).
|
|
||||||
|
|
||||||
|
|
||||||
===Auxiliary modules===
|
|
||||||
|
|
||||||
These auxuliary ``resource`` modules will be written by you.
|
|
||||||
|
|
||||||
- ``ResGer``: parameter types and auxiliary operations
|
|
||||||
(a resource for the resource grammar!)
|
|
||||||
- ``ParadigmsGer``: complete inflection engine and most important regular paradigms
|
|
||||||
- ``MorphoGer``: auxiliaries for ``ParadigmsGer`` and ``StructuralGer``. This need
|
|
||||||
not be separate from ``ResGer``.
|
|
||||||
|
|
||||||
|
|
||||||
These modules are language-independent and provided by the existing resource
|
|
||||||
package.
|
|
||||||
|
|
||||||
- ``ParamX``: parameter types used in many languages
|
|
||||||
- ``CommonX``: implementation of language-uniform categories
|
|
||||||
such as $Text$ and $Phr$, as well as of
|
|
||||||
the logical tense, anteriority, and polarity parameters
|
|
||||||
- ``Coordination``: operations to deal with lists and coordination
|
|
||||||
- ``Prelude``: general-purpose operations on strings, records,
|
|
||||||
truth values, etc.
|
|
||||||
- ``Predef``: general-purpose operations with hard-coded definitions
|
|
||||||
|
|
||||||
|
|
||||||
An important decision is what rules to implement in terms of operations in
|
|
||||||
``ResGer``. The **golden rule of functional programming** says:
|
|
||||||
- //Whenever you find yourself programming by copy and paste, write a function instead!//.
|
|
||||||
|
|
||||||
|
|
||||||
This rule suggests that an operation should be created if it is to be
|
|
||||||
used at least twice. At the same time, a sound principle of **vicinity** says:
|
|
||||||
- //It should not require too much browsing to understand what a piece of code does.//
|
|
||||||
|
|
||||||
|
|
||||||
From these two principles, we have derived the following practice:
|
|
||||||
- If an operation is needed //in two different modules//,
|
|
||||||
it should be created in as an ``oper`` in ``ResGer``. An example is ``mkClause``,
|
|
||||||
used in ``Sentence``, ``Question``, and ``Relative``-
|
|
||||||
- If an operation is needed //twice in the same module//, but never
|
|
||||||
outside, it should be created in the same module. Many examples are
|
|
||||||
found in ``Numerals``.
|
|
||||||
- If an operation is needed //twice in the same judgement//, but never
|
|
||||||
outside, it should be created by a ``let`` definition.
|
|
||||||
- If an operation is only needed once, it should not be created as an ``oper``,
|
|
||||||
but rather inlined. However, a ``let`` definition may well be in place just
|
|
||||||
to make the readable.
|
|
||||||
Most functions in phrase category modules
|
|
||||||
are implemented in this way.
|
|
||||||
|
|
||||||
|
|
||||||
This discipline is very different from the one followed in early
|
|
||||||
versions of the library (up to 0.9). We then valued the principle of
|
|
||||||
abstraction more than vicinity, creating layers of abstraction for
|
|
||||||
almost everything. This led in practice to the duplication of almost
|
|
||||||
all code on the ``lin`` and ``oper`` levels, and made the code
|
|
||||||
hard to understand and maintain.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Morphology and lexicon===
|
|
||||||
|
|
||||||
The paradigms needed to implement
|
|
||||||
``LexiconGer`` are defined in
|
|
||||||
``ParadigmsGer``.
|
|
||||||
This module provides high-level ways to define the linearization of
|
|
||||||
lexical items, of categories ``N, A, V`` and their complement-taking
|
|
||||||
variants.
|
|
||||||
|
|
||||||
For ease of use, the ``Paradigms`` modules follow a certain
|
|
||||||
naming convention. Thus they for each lexical category, such as ``N``,
|
|
||||||
the overloaded functions, such as ``mkN``, with the following cases:
|
|
||||||
|
|
||||||
- the worst-case construction of ``N``. Its type signature
|
|
||||||
has the form
|
|
||||||
```
|
|
||||||
mkN : Str -> ... -> Str -> P -> ... -> Q -> N
|
|
||||||
```
|
|
||||||
with as many string and parameter arguments as can ever be needed to
|
|
||||||
construct an ``N``.
|
|
||||||
- the most regular cases, with just one string argument:
|
|
||||||
```
|
|
||||||
mkN : Str -> N
|
|
||||||
```
|
|
||||||
- A language-dependent (small) set of functions to handle mild irregularities
|
|
||||||
and common exceptions.
|
|
||||||
|
|
||||||
|
|
||||||
For the complement-taking variants, such as ``V2``, we provide
|
|
||||||
- a case that takes a ``V`` and all necessary arguments, such
|
|
||||||
as case and preposition:
|
|
||||||
```
|
|
||||||
mkV2 : V -> Case -> Str -> V2 ;
|
|
||||||
```
|
|
||||||
- a case that takes a ``Str`` and produces a transitive verb with the direct
|
|
||||||
object case:
|
|
||||||
```
|
|
||||||
mkV2 : Str -> V2 ;
|
|
||||||
```
|
|
||||||
- A language-dependent (small) set of functions to handle common special cases,
|
|
||||||
such as transitive verbs that are not regular:
|
|
||||||
```
|
|
||||||
mkV2 : V -> V2 ;
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
The golden rule for the design of paradigms is that
|
|
||||||
- //The user of the library will only need function applications with constants and strings, never any records or tables.//
|
|
||||||
|
|
||||||
|
|
||||||
The discipline of data abstraction moreover requires that the user of the resource
|
|
||||||
is not given access to parameter constructors, but only to constants that denote
|
|
||||||
them. This gives the resource grammarian the freedom to change the underlying
|
|
||||||
data representation if needed. It means that the ``ParadigmsGer`` module has
|
|
||||||
to define constants for those parameter types and constructors that
|
|
||||||
the application grammarian may need to use, e.g.
|
|
||||||
```
|
|
||||||
oper
|
|
||||||
Case : Type ;
|
|
||||||
nominative, accusative, genitive, dative : Case ;
|
|
||||||
```
|
|
||||||
These constants are defined in terms of parameter types and constructors
|
|
||||||
in ``ResGer`` and ``MorphoGer``, which modules are not
|
|
||||||
visible to the application grammarian.
|
|
||||||
|
|
||||||
|
|
||||||
===Lock fields===
|
|
||||||
|
|
||||||
An important difference between ``MorphoGer`` and
|
|
||||||
``ParadigmsGer`` is that the former uses "raw" record types
|
|
||||||
for word classes, whereas the latter used category symbols defined in
|
|
||||||
``CatGer``. When these category symbols are used to denote
|
|
||||||
record types in a resource modules, such as ``ParadigmsGer``,
|
|
||||||
a **lock field** is added to the record, so that categories
|
|
||||||
with the same implementation are not confused with each other.
|
|
||||||
(This is inspired by the ``newtype`` discipline in Haskell.)
|
|
||||||
For instance, the lincats of adverbs and conjunctions are the same
|
|
||||||
in ``CommonX`` (and therefore in ``CatGer``, which inherits it):
|
|
||||||
```
|
|
||||||
lincat Adv = {s : Str} ;
|
|
||||||
lincat Conj = {s : Str} ;
|
|
||||||
```
|
|
||||||
But when these category symbols are used to denote their linearization
|
|
||||||
types in resource module, these definitions are translated to
|
|
||||||
```
|
|
||||||
oper Adv : Type = {s : Str ; lock_Adv : {}} ;
|
|
||||||
oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
|
|
||||||
```
|
|
||||||
In this way, the user of a resource grammar cannot confuse adverbs with
|
|
||||||
conjunctions. In other words, the lock fields force the type checker
|
|
||||||
to function as grammaticality checker.
|
|
||||||
|
|
||||||
When the resource grammar is ``open``ed in an application grammar, the
|
|
||||||
lock fields are never seen (except possibly in type error messages),
|
|
||||||
and the application grammarian should never write them herself. If she
|
|
||||||
has to do this, it is a sign that the resource grammar is incomplete, and
|
|
||||||
the proper way to proceed is to fix the resource grammar.
|
|
||||||
|
|
||||||
The resource grammarian has to provide the dummy lock field values
|
|
||||||
in her hidden definitions of constants in ``Paradigms``. For instance,
|
|
||||||
```
|
|
||||||
mkAdv : Str -> Adv ;
|
|
||||||
-- mkAdv s = {s = s ; lock_Adv = <>} ;
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
===Lexicon construction===
|
|
||||||
|
|
||||||
The lexicon belonging to ``LangGer`` consists of two modules:
|
|
||||||
- ``StructuralGer``, structural words, built by using both
|
|
||||||
``ParadigmsGer`` and ``MorphoGer``.
|
|
||||||
- ``LexiconGer``, content words, built by using ``ParadigmsGer`` only.
|
|
||||||
|
|
||||||
|
|
||||||
The reason why ``MorphoGer`` has to be used in ``StructuralGer``
|
|
||||||
is that ``ParadigmsGer`` does not contain constructors for closed
|
|
||||||
word classes such as pronouns and determiners. The reason why we
|
|
||||||
recommend ``ParadigmsGer`` for building ``LexiconGer`` is that
|
|
||||||
the coverage of the paradigms gets thereby tested and that the
|
|
||||||
use of the paradigms in ``LexiconGer`` gives a good set of examples for
|
|
||||||
those who want to build new lexica.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Lexicon extension==
|
|
||||||
|
|
||||||
===The irregularity lexicon===
|
|
||||||
|
|
||||||
It is useful in most languages to provide a separate module of irregular
|
|
||||||
verbs and other words which are difficult for a lexicographer
|
|
||||||
to handle. There are usually a limited number of such words - a
|
|
||||||
few hundred perhaps. Building such a lexicon separately also
|
|
||||||
makes it less important to cover //everything// by the
|
|
||||||
worst-case variants of the paradigms ``mkV`` etc.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Lexicon extraction from a word list===
|
|
||||||
|
|
||||||
You can often find resources such as lists of
|
|
||||||
irregular verbs on the internet. For instance, the
|
|
||||||
Irregular German Verb page
|
|
||||||
previously found in
|
|
||||||
``http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html``
|
|
||||||
page gives a list of verbs in the
|
|
||||||
traditional tabular format, which begins as follows:
|
|
||||||
```
|
|
||||||
backen (du bäckst, er bäckt) backte [buk] gebacken
|
|
||||||
befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
|
|
||||||
beginnen begann (begönne; begänne) begonnen
|
|
||||||
beißen biß gebissen
|
|
||||||
```
|
|
||||||
All you have to do is to write a suitable verb paradigm
|
|
||||||
```
|
|
||||||
irregV : (x1,_,_,_,_,x6 : Str) -> V ;
|
|
||||||
```
|
|
||||||
and a Perl or Python or Haskell script that transforms
|
|
||||||
the table to
|
|
||||||
```
|
|
||||||
backen_V = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
|
|
||||||
befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
|
|
||||||
```
|
|
||||||
|
|
||||||
When using ready-made word lists, you should think about
|
|
||||||
coyright issues. All resource grammar material should
|
|
||||||
be provided under GNU Lesser General Public License (LGPL).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Lexicon extraction from raw text data===
|
|
||||||
|
|
||||||
This is a cheap technique to build a lexicon of thousands
|
|
||||||
of words, if text data is available in digital format.
|
|
||||||
See the [Extract Homepage http://www.cs.chalmers.se/~markus/extract/]
|
|
||||||
homepage for details.
|
|
||||||
|
|
||||||
|
|
||||||
===Bootstrapping with smart paradigms===
|
|
||||||
|
|
||||||
This is another cheap technique, where you need as input a list of words with
|
|
||||||
part-of-speech marking. You initialize the lexicon by using the one-argument
|
|
||||||
``mkN`` etc paradigms, and add forms to those words that do not come out right.
|
|
||||||
This procedure is described in the paper
|
|
||||||
|
|
||||||
A. Ranta.
|
|
||||||
How predictable is Finnish morphology? An experiment on lexicon construction.
|
|
||||||
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
|
|
||||||
//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//,
|
|
||||||
University of Uppsala,
|
|
||||||
2008.
|
|
||||||
Available from the [series homepage http://publications.uu.se/abstract.xsql?dbid=8933]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Extending the resource grammar API==
|
|
||||||
|
|
||||||
Sooner or later it will happen that the resource grammar API
|
|
||||||
does not suffice for all applications. A common reason is
|
|
||||||
that it does not include idiomatic expressions in a given language.
|
|
||||||
The solution then is in the first place to build language-specific
|
|
||||||
extension modules, like ``ExtraGer``.
|
|
||||||
|
|
||||||
==Using parametrized modules==
|
|
||||||
|
|
||||||
===Writing an instance of parametrized resource grammar implementation===
|
|
||||||
|
|
||||||
Above we have looked at how a resource implementation is built by
|
|
||||||
the copy and paste method (from English to German), that is, formally
|
|
||||||
speaking, from scratch. A more elegant solution available for
|
|
||||||
families of languages such as Romance and Scandinavian is to
|
|
||||||
use parametrized modules. The advantages are
|
|
||||||
- theoretical: linguistic generalizations and insights
|
|
||||||
- practical: maintainability improves with fewer components
|
|
||||||
|
|
||||||
|
|
||||||
Here is a set of
|
|
||||||
[slides http://www.cs.chalmers.se/~aarne/geocal2006.pdf]
|
|
||||||
on the topic.
|
|
||||||
|
|
||||||
|
|
||||||
===Parametrizing a resource grammar implementation===
|
|
||||||
|
|
||||||
This is the most demanding form of resource grammar writing.
|
|
||||||
We do //not// recommend the method of parametrizing from the
|
|
||||||
beginning: it is easier to have one language first implemented
|
|
||||||
in the conventional way and then add another language of the
|
|
||||||
same family by aprametrization. This means that the copy and
|
|
||||||
paste method is still used, but at this time the differences
|
|
||||||
are put into an ``interface`` module.
|
|
||||||
|
|
||||||
|
|
||||||
==Character encoding and transliterations==
|
|
||||||
|
|
||||||
This section is relevant for languages using a non-ASCII character set.
|
|
||||||
|
|
||||||
==Coding conventions in GF==
|
|
||||||
|
|
||||||
From version 3.0, GF follows a simple encoding convention:
|
|
||||||
- GF source files may follow any encoding, such as isolatin-1 or UTF-8;
|
|
||||||
the default is isolatin-1, and UTF8 must be indicated by the judgement
|
|
||||||
```
|
|
||||||
flags coding = utf8 ;
|
|
||||||
```
|
|
||||||
in each source module.
|
|
||||||
- for internal processing, all characters are converted to 16-bit unicode,
|
|
||||||
as the first step of grammar compilation guided by the ``coding`` flag
|
|
||||||
- as the last step of compilation, all characters are converted to UTF-8
|
|
||||||
- thus, GF object files (``gfo``) and the Portable Grammar Format (``pgf``)
|
|
||||||
are in UTF-8
|
|
||||||
|
|
||||||
|
|
||||||
Most current resource grammars use isolatin-1 in the source, but this does
|
|
||||||
not affect their use in parallel with grammars written in other encodings.
|
|
||||||
In fact, a grammar can be put up from modules using different codings.
|
|
||||||
|
|
||||||
**Warning**. While string literals may contain any characters, identifiers
|
|
||||||
must be isolatin-1 letters (or digits, underscores, or dashes). This has to
|
|
||||||
do with the restrictions of the lexer tool that is used.
|
|
||||||
|
|
||||||
|
|
||||||
==Transliterations==
|
|
||||||
|
|
||||||
While UTF-8 is well supported by most web browsers, its use in terminals and
|
|
||||||
text editors may cause disappointment. Many grammarians therefore prefer to
|
|
||||||
use ASCII transliterations. GF 3.0beta2 provides the following built-in
|
|
||||||
transliterations:
|
|
||||||
- Arabic
|
|
||||||
- Devanagari (Hindi)
|
|
||||||
- Thai
|
|
||||||
|
|
||||||
|
|
||||||
New transliterations can be defined in the GF source file
|
|
||||||
[``GF/Text/Transliterations.hs`` ../src/GF/Text/Transliterations.hs].
|
|
||||||
This file also gives instructions on how new ones are added.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 102 KiB |
1497
deprecated/config.guess
vendored
@@ -1,37 +0,0 @@
|
|||||||
# GF configuration file. configure will produce config.mk from this file
|
|
||||||
# @configure_input@
|
|
||||||
|
|
||||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
|
||||||
|
|
||||||
prefix = @prefix@
|
|
||||||
exec_prefix = @exec_prefix@
|
|
||||||
bindir = @bindir@
|
|
||||||
libdir = @libdir@
|
|
||||||
datadir = @datadir@
|
|
||||||
|
|
||||||
host = @host@
|
|
||||||
build = @build@
|
|
||||||
|
|
||||||
GHCFLAGS = @GHCFLAGS@
|
|
||||||
CPPFLAGS = @CPPFLAGS@
|
|
||||||
LDFLAGS = @LDFLAGS@
|
|
||||||
|
|
||||||
EXEEXT = @EXEEXT@
|
|
||||||
|
|
||||||
INSTALL = @INSTALL@
|
|
||||||
TAR = @TAR@
|
|
||||||
|
|
||||||
GHC = "@GHC@"
|
|
||||||
GHCI = "@GHCI@"
|
|
||||||
|
|
||||||
READLINE = @READLINE@
|
|
||||||
|
|
||||||
INTERRUPT = @INTERRUPT@
|
|
||||||
|
|
||||||
ATK = @ATK@
|
|
||||||
|
|
||||||
ENABLE_JAVA = @ENABLE_JAVA@
|
|
||||||
|
|
||||||
JAVAC = "@JAVAC@"
|
|
||||||
JAR = "@JAR@"
|
|
||||||
|
|
||||||
1608
deprecated/config.sub
vendored
@@ -1,229 +0,0 @@
|
|||||||
dnl Run autoconf to generate configure from this file
|
|
||||||
|
|
||||||
AC_INIT([GF],[3.0-beta3],[aarne@cs.chalmers.se],[GF])
|
|
||||||
|
|
||||||
AC_PREREQ(2.53)
|
|
||||||
|
|
||||||
AC_REVISION($Revision: 1.26 $)
|
|
||||||
|
|
||||||
AC_CONFIG_FILES([config.mk gfc])
|
|
||||||
|
|
||||||
AC_CANONICAL_HOST
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl Executable suffix
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
|
|
||||||
AC_MSG_CHECKING([executable suffix])
|
|
||||||
case $host_os in
|
|
||||||
cygwin)
|
|
||||||
EXEEXT='.exe';;
|
|
||||||
*)
|
|
||||||
EXEEXT='';;
|
|
||||||
esac
|
|
||||||
AC_MSG_RESULT(['$EXEEXT'])
|
|
||||||
AC_SUBST(EXEEXT)
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl GHC
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_ARG_WITH(ghc,
|
|
||||||
AC_HELP_STRING([--with-ghc=<ghc command>],
|
|
||||||
[Use a different command instead of
|
|
||||||
'ghc' for the Haskell compiler.]),
|
|
||||||
[AC_CHECK_FILE("$withval",GHC="$withval",[AC_PATH_PROG(GHC,"$withval")])],
|
|
||||||
[AC_PATH_PROG(GHC,ghc)])
|
|
||||||
|
|
||||||
GHCI=$(dirname $GHC)/ghci
|
|
||||||
|
|
||||||
GHC_VERSION=`$GHC --version | sed -e 's/.*version //'`
|
|
||||||
AC_MSG_CHECKING([GHC version])
|
|
||||||
AC_MSG_RESULT($GHC_VERSION)
|
|
||||||
|
|
||||||
|
|
||||||
AC_SUBST(GHC)
|
|
||||||
AC_SUBST(GHCI)
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl readline
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_ARG_WITH(readline,
|
|
||||||
AC_HELP_STRING([--with-readline=<readline alternative>],
|
|
||||||
[Select which readline implementation to use.
|
|
||||||
Available alternatives are: 'readline' (GNU readline),
|
|
||||||
'no' (don't use readline)
|
|
||||||
(default = readline)]),
|
|
||||||
[if test "$withval" = "yes"; then
|
|
||||||
READLINE="readline"
|
|
||||||
else
|
|
||||||
READLINE="$withval"
|
|
||||||
fi],
|
|
||||||
[if test "$host_os" = "cygwin"; then
|
|
||||||
AC_MSG_WARN([There are problems with readline for Windows,
|
|
||||||
for example, pipe characters do not work.
|
|
||||||
Disabling readline support.
|
|
||||||
Use --with-readline to override.])
|
|
||||||
READLINE="no"
|
|
||||||
else
|
|
||||||
READLINE="readline"
|
|
||||||
fi])
|
|
||||||
|
|
||||||
case $READLINE in
|
|
||||||
readline)
|
|
||||||
;;
|
|
||||||
no)
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
AC_MSG_ERROR([Bad value for --with-readline: $READLINE])
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AC_SUBST(READLINE)
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl command interruption
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_ARG_WITH(interrupt,
|
|
||||||
AC_HELP_STRING([--with-interrupt=<allow command interruption>],
|
|
||||||
[Choose whether to enable interruption of commands
|
|
||||||
with SIGINT (Ctrl-C)
|
|
||||||
Available alternatives are: 'yes', 'no'
|
|
||||||
(default = yes)]),
|
|
||||||
[INTERRUPT="$withval"],
|
|
||||||
[if test "$host_os" = "cygwin"; then
|
|
||||||
AC_MSG_WARN([Command interruption does not work under
|
|
||||||
Cygwin, because of missing signal handler support.
|
|
||||||
Disabling command interruption support.
|
|
||||||
Use --with-interrupt to override.])
|
|
||||||
INTERRUPT="no"
|
|
||||||
else
|
|
||||||
INTERRUPT="yes"
|
|
||||||
fi])
|
|
||||||
|
|
||||||
case $INTERRUPT in
|
|
||||||
yes)
|
|
||||||
;;
|
|
||||||
no)
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
AC_MSG_ERROR([Bad value for --with-interrupt: $INTERRUPT])
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AC_SUBST(INTERRUPT)
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl ATK speech recognition
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_ARG_WITH(atk,
|
|
||||||
AC_HELP_STRING([--with-atk=<use ATK speech recognition>],
|
|
||||||
[Choose whether to compile in support for speech
|
|
||||||
recognition using ATK. Requires ATK and libatkrec.
|
|
||||||
Available alternatives are: 'yes', 'no'
|
|
||||||
(default = no)]),
|
|
||||||
[ATK="$withval"],
|
|
||||||
[ATK="no"])
|
|
||||||
|
|
||||||
case $ATK in
|
|
||||||
yes)
|
|
||||||
AC_MSG_CHECKING([for atkrec package])
|
|
||||||
ATKREC_VERSION=`ghc-pkg field atkrec version`
|
|
||||||
if test "$ATKREC_VERSION" = ""; then
|
|
||||||
AC_MSG_RESULT(['not found'])
|
|
||||||
AC_MSG_WARN([Disabling ATK support.])
|
|
||||||
ATK="no"
|
|
||||||
else
|
|
||||||
AC_MSG_RESULT([$ATKREC_VERSION])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
no)
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
AC_MSG_ERROR([Bad value for --with-atk: $ATK])
|
|
||||||
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AC_SUBST(ATK)
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl java stuff
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_ARG_ENABLE(java,
|
|
||||||
AC_HELP_STRING([--enable-java],
|
|
||||||
[Build Java components. (default = yes)]),
|
|
||||||
[ENABLE_JAVA="$enableval"],
|
|
||||||
[ENABLE_JAVA=yes]
|
|
||||||
)
|
|
||||||
|
|
||||||
if test "$ENABLE_JAVA" = "yes"; then
|
|
||||||
|
|
||||||
AC_ARG_WITH(javac,
|
|
||||||
AC_HELP_STRING([--with-javac=<javac command>],
|
|
||||||
[Use a different command instead of
|
|
||||||
'javac' for the Java compiler.]),
|
|
||||||
[AC_CHECK_FILE("$withval",JAVAC="$withval",[AC_PATH_PROG(JAVAC,"$withval")])],
|
|
||||||
[AC_PATH_PROG(JAVAC,javac)])
|
|
||||||
AC_SUBST(JAVAC)
|
|
||||||
|
|
||||||
AC_ARG_WITH(java,
|
|
||||||
AC_HELP_STRING([--with-java=<java command>],
|
|
||||||
[Use a different command instead of
|
|
||||||
'java' for the Java Virtual Machine.]),
|
|
||||||
[AC_CHECK_FILE("$withval",JAVA="$withval",[AC_PATH_PROG(JAVA,"$withval")])],
|
|
||||||
[AC_PATH_PROG(JAVA,java)])
|
|
||||||
AC_SUBST(JAVA)
|
|
||||||
|
|
||||||
AC_ARG_WITH(jar,
|
|
||||||
AC_HELP_STRING([--with-jar=<jar command>],
|
|
||||||
[Use a different command instead of
|
|
||||||
'jar' for the Java archive tool.]),
|
|
||||||
[AC_CHECK_FILE("$withval",JAR="$withval",[AC_PATH_PROG(JAR,"$withval")])],
|
|
||||||
[AC_PATH_PROG(JAR,jar)])
|
|
||||||
AC_SUBST(JAR)
|
|
||||||
|
|
||||||
if test "$JAVAC" = "" || test ! -x "$JAVAC" \
|
|
||||||
|| test "$JAVA" = "" || test ! -x "$JAVA" \
|
|
||||||
|| test "$JAR" = "" || test ! -x "$JAR"; then
|
|
||||||
|
|
||||||
AC_MSG_WARN([Not building Java components.])
|
|
||||||
ENABLE_JAVA=no
|
|
||||||
fi
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
AC_SUBST(ENABLE_JAVA)
|
|
||||||
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl TAR
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_CHECK_PROGS(TAR, gtar tar)
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl Other programs
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_PROG_INSTALL
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl Program flags
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_SUBST(GHCFLAGS)
|
|
||||||
AC_SUBST(CPPFLAGS)
|
|
||||||
AC_SUBST(LDFLAGS)
|
|
||||||
|
|
||||||
dnl ***********************************************
|
|
||||||
dnl Output
|
|
||||||
dnl ***********************************************
|
|
||||||
|
|
||||||
AC_OUTPUT
|
|
||||||
|
|
||||||
@@ -1,259 +0,0 @@
|
|||||||
<html>
|
|
||||||
<HEAD><META http-equiv=Content-Type content="text/html; charset=utf-8"></HEAD>
|
|
||||||
<body>
|
|
||||||
af_tunni : lámma kún síddi? boqól afartón i ków
|
|
||||||
|
|
||||||
<p>
|
|
||||||
albanian : dy mijë tre qind e dyzet e një
|
|
||||||
|
|
||||||
<p>
|
|
||||||
amharic : ሁለት ሺህ ሦስት መቶ ኣርባ ኣንድ
|
|
||||||
|
|
||||||
<p>
|
|
||||||
arabic_classical : الفان و ثلاث مائة و واحد و أربعون
|
|
||||||
|
|
||||||
<p>
|
|
||||||
arabic_modern : ﺍﻟﻔﻴﻦ ﻭ ﺛﻼﺛﻤﺎﺋﺔ ﻭ ﻭﺍﺣﺪ ﻭ ﺃﺭﺑﻌﻴﻦ
|
|
||||||
|
|
||||||
<p>
|
|
||||||
basque : bi mila ta hirurehun berrogei ta bat
|
|
||||||
|
|
||||||
<p>
|
|
||||||
bearlake_slave : nákee lamíl tai lak'o, óno, di,i, honéno, ?ó, l-ée
|
|
||||||
|
|
||||||
<p>
|
|
||||||
bulgarian : две жиляди триста четирисет и едно
|
|
||||||
|
|
||||||
<p>
|
|
||||||
catalan : dos mil tres-cents quaranta - u
|
|
||||||
|
|
||||||
<p>
|
|
||||||
chinese : è´° ä» é¶ å ä½° è æ¾ 壹
|
|
||||||
|
|
||||||
<p>
|
|
||||||
croatian : dva hiljade tri stotine četrdeset i jedan
|
|
||||||
|
|
||||||
<p>
|
|
||||||
czech : dva tisíce tr^i sta čtyr^icet jeden
|
|
||||||
|
|
||||||
<p>
|
|
||||||
dagur : hoire miange guarebe jau duci neke
|
|
||||||
|
|
||||||
<p>
|
|
||||||
danish : to tusind og tre hundrede og en og fyrre
|
|
||||||
|
|
||||||
<p>
|
|
||||||
decimal : 2341
|
|
||||||
|
|
||||||
<p>
|
|
||||||
dutch : twee duizend drie honderd een en veertig
|
|
||||||
|
|
||||||
<p>
|
|
||||||
english : two thousand three hundred and forty - one
|
|
||||||
|
|
||||||
<p>
|
|
||||||
finnish : kaksi tuhatta kolme sataa neljä kymmentä yksi
|
|
||||||
|
|
||||||
<p>
|
|
||||||
french : deux mille trois cent quarante et un
|
|
||||||
|
|
||||||
<p>
|
|
||||||
french_swiss : deux mille trois cent quarante et un
|
|
||||||
|
|
||||||
<p>
|
|
||||||
fulfulde : ujine d.id.i temed.d.e tati e chappand.e nai e go'o
|
|
||||||
|
|
||||||
<p>
|
|
||||||
geez : ዕሽራ ወ ሠላስቱ ምእት አርብዓ ወ አሐዱ
|
|
||||||
|
|
||||||
<p>
|
|
||||||
german : zwei tausend drei hundert ein und vierzig
|
|
||||||
|
|
||||||
<p>
|
|
||||||
greek_classical : δισχίλιοι τριακόσιοι τετταράκοντα εἵς
|
|
||||||
|
|
||||||
<p>
|
|
||||||
greek_modern : δύο χιλιάδες τριακόσια σαράντα ένα
|
|
||||||
|
|
||||||
<p>
|
|
||||||
guahibo : aniha sunu akueya sia yana bae kae
|
|
||||||
|
|
||||||
<p>
|
|
||||||
guarani : moko~i ma mpohapy sa~ irundy kua~ petei~
|
|
||||||
|
|
||||||
<p>
|
|
||||||
hebrew_biblical : אלפים ו שלש מאות ו ארבעים ו אחד
|
|
||||||
|
|
||||||
<p>
|
|
||||||
hindi : दो हज़ार तीन सौ एक्तालीस
|
|
||||||
|
|
||||||
<p>
|
|
||||||
hungarian : két ezer három száz negyven egy
|
|
||||||
|
|
||||||
<p>
|
|
||||||
icelandic : tvö Þúsund Þrjú hundrað fjörutíu og einn
|
|
||||||
|
|
||||||
<p>
|
|
||||||
irish : dhá mhíle trí chead dhá fhichead a haon
|
|
||||||
|
|
||||||
<p>
|
|
||||||
italian : due mila tre cento quaranta uno
|
|
||||||
|
|
||||||
<p>
|
|
||||||
japanese : にせん さんびゃく よんぢゅう いち
|
|
||||||
|
|
||||||
<p>
|
|
||||||
kabardian : m&yn&yt' s'a&ys' p'L-'&s'ra z&ra
|
|
||||||
|
|
||||||
<p>
|
|
||||||
kambera : dua riu tailu ngahu patu kambulu hau
|
|
||||||
|
|
||||||
<p>
|
|
||||||
kawaiisu : N
|
|
||||||
<p>
|
|
||||||
khmer : bīra bā'na pī raya sē sipa mwya
|
|
||||||
|
|
||||||
<p>
|
|
||||||
khowar : joo hazâr troi shọr oché joo bîsher î
|
|
||||||
|
|
||||||
<p>
|
|
||||||
kodagu : i:ra:yrat mu:nu:yt.a na:padï
|
|
||||||
|
|
||||||
<p>
|
|
||||||
kolyma_yukaghir : N
|
|
||||||
<p>
|
|
||||||
kulung : ni habau su chhum lik i
|
|
||||||
|
|
||||||
<p>
|
|
||||||
kwami : dùbúk póllów dálmágí kúnún kán kúu pòD^òw kán múndí
|
|
||||||
|
|
||||||
<p>
|
|
||||||
kwaza : N
|
|
||||||
<p>
|
|
||||||
lalo : `n. t'w sa há i tjhí tjh`&
|
|
||||||
|
|
||||||
<p>
|
|
||||||
lamani : di hajaar do se caaLise par ek
|
|
||||||
|
|
||||||
<p>
|
|
||||||
latvian : divtu^kstoš trīssimt četrdesmit viens
|
|
||||||
|
|
||||||
<p>
|
|
||||||
lithuanian : dù tú:kstanc^iu, try:s s^imtai~ ke:turiasdes^imt víenas
|
|
||||||
|
|
||||||
<p>
|
|
||||||
lotuxo : tausand ârrexai ikO EssIxa xunixoi ikO atOmwana aNwan x' âbotye
|
|
||||||
|
|
||||||
<p>
|
|
||||||
maale : lam?ó $íya haitsó s'ééta ?oydí-támmi pétte
|
|
||||||
|
|
||||||
<p>
|
|
||||||
malay : dua ribu tiga ratus empat puluh satu
|
|
||||||
|
|
||||||
<p>
|
|
||||||
maltese : elfejn tliet mija u wieh-ed u erbgh-in
|
|
||||||
|
|
||||||
<p>
|
|
||||||
mapuche : epu warangka külá pataka meli mari kiñe
|
|
||||||
|
|
||||||
<p>
|
|
||||||
margi : dúbú s`&d>àN ghàrú mák`&r agá fód>ú kùmì gà s'&r pátlú*
|
|
||||||
|
|
||||||
<p>
|
|
||||||
maybrat : N
|
|
||||||
<p>
|
|
||||||
miya : d'&bu ts`&r '`&náa d>àriy kìdi '`&náa díb>i f`&d>& bèh&n wut'&
|
|
||||||
|
|
||||||
<p>
|
|
||||||
mongolian : qoyar mingGan Gurban ĵa'un döčin nigän
|
|
||||||
|
|
||||||
<p>
|
|
||||||
nenets : side juonar n-ahar jur t-êt ju' ~ob
|
|
||||||
|
|
||||||
<p>
|
|
||||||
norwegian_book : to tusen og tre hundre og førti et
|
|
||||||
|
|
||||||
<p>
|
|
||||||
old_church_slavonic : дъвѣ тысѭшти триѥ съта четыре десѧте и ѥдинъ
|
|
||||||
|
|
||||||
<p>
|
|
||||||
oromo : kuma lama fi dhibba sadii fi afurtamii tokko
|
|
||||||
|
|
||||||
<p>
|
|
||||||
pashto : دوه زره دري سوه او يو څلوۍښت
|
|
||||||
|
|
||||||
<p>
|
|
||||||
polish : dwa tysiace trzysta czterdziesci jeden
|
|
||||||
|
|
||||||
<p>
|
|
||||||
portuguese : dois mil trezentos quarenta e um
|
|
||||||
|
|
||||||
<p>
|
|
||||||
quechua : iskay warank'a kinsa pachak tawa chunka jukniyuq
|
|
||||||
|
|
||||||
<p>
|
|
||||||
romanian : două mii trei sute patruzeci şi unu
|
|
||||||
|
|
||||||
<p>
|
|
||||||
russian : две тысячи триста сорок один
|
|
||||||
|
|
||||||
<p>
|
|
||||||
sango : ngbangbu bale óse na ndó ní ngbangbu otá na ndó ní bale osió na ndó ní ÓkO
|
|
||||||
|
|
||||||
<p>
|
|
||||||
sanskrit : त्रि शतान्य एकचत्वारिंशच च द्वे सहस्रे
|
|
||||||
|
|
||||||
<p>
|
|
||||||
slovak : dva tisic tri sto styridsat jedna
|
|
||||||
|
|
||||||
<p>
|
|
||||||
sorani : دۇ ههزار سىسهد ځل و يهك
|
|
||||||
|
|
||||||
<p>
|
|
||||||
spanish : dos mil trescientos cuarenta y uno
|
|
||||||
|
|
||||||
<p>
|
|
||||||
stieng : baar ban pê riêng puôn jo't muôi
|
|
||||||
|
|
||||||
<p>
|
|
||||||
swahili : elfu mbili mia tatu arobaini na moja
|
|
||||||
|
|
||||||
<p>
|
|
||||||
swedish : två tusen tre hundra fyrtio ett
|
|
||||||
|
|
||||||
<p>
|
|
||||||
tamil : இரணௌடௌ ஆயாரதௌதீ மீனௌ நரீ நரௌ பதௌ ஓனௌரீ
|
|
||||||
|
|
||||||
<p>
|
|
||||||
tampere : kaks tuhatta kolme sataa nel kyt yks
|
|
||||||
|
|
||||||
<p>
|
|
||||||
tibetan : t̆ong ṭ'a' n̆yī d́ang sumğya d́ang z̆hyib chu źhye chi'
|
|
||||||
|
|
||||||
<p>
|
|
||||||
totonac : maa t~u3 mil lii ~a tuhun pus^um tun
|
|
||||||
|
|
||||||
<p>
|
|
||||||
tuda_daza : dubu cu sao kidra ago.zo. sao mOrta tozo sao tro
|
|
||||||
|
|
||||||
<p>
|
|
||||||
tukang_besi : dua riwu tolu hatu hato hulu sa'asa
|
|
||||||
|
|
||||||
<p>
|
|
||||||
turkish : iki bin üç yüz kırk bir
|
|
||||||
|
|
||||||
<p>
|
|
||||||
votic : kahsi tuhatta keVmsata: nelläts^ümmet ühsi
|
|
||||||
|
|
||||||
<p>
|
|
||||||
welsh : dau fil tri chan un a deugain
|
|
||||||
|
|
||||||
<p>
|
|
||||||
yasin_burushaski : altó hazár iskí tha altó-áltar hek
|
|
||||||
|
|
||||||
<p>
|
|
||||||
zaiwa : i55 hing55 sum11 syo31 mi11 cue31 ra11
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
||||||
@@ -1,569 +0,0 @@
|
|||||||
\batchmode
|
|
||||||
%This Latex file is machine-generated by the BNF-converter
|
|
||||||
|
|
||||||
\documentclass[a4paper,11pt]{article}
|
|
||||||
\author{BNF-converter}
|
|
||||||
\title{The Language GF}
|
|
||||||
\setlength{\parindent}{0mm}
|
|
||||||
\setlength{\parskip}{1mm}
|
|
||||||
\begin{document}
|
|
||||||
|
|
||||||
\maketitle
|
|
||||||
|
|
||||||
\newcommand{\emptyP}{\mbox{$\epsilon$}}
|
|
||||||
\newcommand{\terminal}[1]{\mbox{{\texttt {#1}}}}
|
|
||||||
\newcommand{\nonterminal}[1]{\mbox{$\langle \mbox{{\sl #1 }} \! \rangle$}}
|
|
||||||
\newcommand{\arrow}{\mbox{::=}}
|
|
||||||
\newcommand{\delimit}{\mbox{$|$}}
|
|
||||||
\newcommand{\reserved}[1]{\mbox{{\texttt {#1}}}}
|
|
||||||
\newcommand{\literal}[1]{\mbox{{\texttt {#1}}}}
|
|
||||||
\newcommand{\symb}[1]{\mbox{{\texttt {#1}}}}
|
|
||||||
|
|
||||||
This document was automatically generated by the {\em BNF-Converter}. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place).
|
|
||||||
|
|
||||||
\section*{The lexical structure of GF}
|
|
||||||
\subsection*{Identifiers}
|
|
||||||
Identifiers \nonterminal{Ident} are unquoted strings beginning with a letter,
|
|
||||||
followed by any combination of letters, digits, and the characters {\tt \_ '},
|
|
||||||
reserved words excluded.
|
|
||||||
|
|
||||||
|
|
||||||
\subsection*{Literals}
|
|
||||||
Integer literals \nonterminal{Int}\ are nonempty sequences of digits.
|
|
||||||
|
|
||||||
|
|
||||||
String literals \nonterminal{String}\ have the form
|
|
||||||
\terminal{"}$x$\terminal{"}, where $x$ is any sequence of any characters
|
|
||||||
except \terminal{"}\ unless preceded by \verb6\6.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
LString literals are recognized by the regular expression
|
|
||||||
\(\mbox{`''} ({\nonterminal{anychar}} - \mbox{`''})* \mbox{`''}\)
|
|
||||||
|
|
||||||
|
|
||||||
\subsection*{Reserved words and symbols}
|
|
||||||
The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions.
|
|
||||||
|
|
||||||
The reserved words used in GF are the following: \\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\reserved{Lin}} &{\reserved{PType}} &{\reserved{Str}} \\
|
|
||||||
{\reserved{Strs}} &{\reserved{Tok}} &{\reserved{Type}} \\
|
|
||||||
{\reserved{abstract}} &{\reserved{case}} &{\reserved{cat}} \\
|
|
||||||
{\reserved{concrete}} &{\reserved{data}} &{\reserved{def}} \\
|
|
||||||
{\reserved{flags}} &{\reserved{fn}} &{\reserved{fun}} \\
|
|
||||||
{\reserved{grammar}} &{\reserved{in}} &{\reserved{include}} \\
|
|
||||||
{\reserved{incomplete}} &{\reserved{instance}} &{\reserved{interface}} \\
|
|
||||||
{\reserved{let}} &{\reserved{lin}} &{\reserved{lincat}} \\
|
|
||||||
{\reserved{lindef}} &{\reserved{lintype}} &{\reserved{of}} \\
|
|
||||||
{\reserved{open}} &{\reserved{oper}} &{\reserved{out}} \\
|
|
||||||
{\reserved{package}} &{\reserved{param}} &{\reserved{pattern}} \\
|
|
||||||
{\reserved{pre}} &{\reserved{printname}} &{\reserved{resource}} \\
|
|
||||||
{\reserved{reuse}} &{\reserved{strs}} &{\reserved{table}} \\
|
|
||||||
{\reserved{tokenizer}} &{\reserved{transfer}} &{\reserved{union}} \\
|
|
||||||
{\reserved{var}} &{\reserved{variants}} &{\reserved{where}} \\
|
|
||||||
{\reserved{with}} & & \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
The symbols used in GF are the following: \\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\symb{;}} &{\symb{{$=$}}} &{\symb{\{}} \\
|
|
||||||
{\symb{\}}} &{\symb{(}} &{\symb{)}} \\
|
|
||||||
{\symb{:}} &{\symb{{$-$}{$>$}}} &{\symb{**}} \\
|
|
||||||
{\symb{,}} &{\symb{[}} &{\symb{]}} \\
|
|
||||||
{\symb{.}} &{\symb{{$|$}}} &{\symb{\%}} \\
|
|
||||||
{\symb{?}} &{\symb{{$<$}}} &{\symb{{$>$}}} \\
|
|
||||||
{\symb{@}} &{\symb{!}} &{\symb{*}} \\
|
|
||||||
{\symb{$\backslash$}} &{\symb{{$=$}{$>$}}} &{\symb{{$+$}{$+$}}} \\
|
|
||||||
{\symb{{$+$}}} &{\symb{\_}} &{\symb{\$}} \\
|
|
||||||
{\symb{/}} &{\symb{{$-$}}} & \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\subsection*{Comments}
|
|
||||||
Single-line comments begin with {\symb{{$-$}{$-$}}}. \\Multiple-line comments are enclosed with {\symb{\{{$-$}}} and {\symb{{$-$}\}}}.
|
|
||||||
|
|
||||||
\section*{The syntactic structure of GF}
|
|
||||||
Non-terminals are enclosed between $\langle$ and $\rangle$.
|
|
||||||
The symbols {\arrow} (production), {\delimit} (union)
|
|
||||||
and {\emptyP} (empty rule) belong to the BNF notation.
|
|
||||||
All other symbols are terminals.\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Grammar}} & {\arrow} &{\nonterminal{ListModDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListModDef}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{ModDef}} {\nonterminal{ListModDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ModDef}} & {\arrow} &{\nonterminal{ModDef}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\terminal{grammar}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\terminal{abstract}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{;}} {\nonterminal{ListConcSpec}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ComplMod}} {\nonterminal{ModType}} {\terminal{{$=$}}} {\nonterminal{ModBody}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ConcSpec}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ConcExp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListConcSpec}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{ConcSpec}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ConcSpec}} {\terminal{;}} {\nonterminal{ListConcSpec}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ConcExp}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListTransfer}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListTransfer}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{Transfer}} {\nonterminal{ListTransfer}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Transfer}} & {\arrow} &{\terminal{(}} {\terminal{transfer}} {\terminal{in}} {\nonterminal{Open}} {\terminal{)}} \\
|
|
||||||
& {\delimit} &{\terminal{(}} {\terminal{transfer}} {\terminal{out}} {\nonterminal{Open}} {\terminal{)}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ModType}} & {\arrow} &{\terminal{abstract}} {\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{resource}} {\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{interface}} {\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{concrete}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{instance}} {\nonterminal{Ident}} {\terminal{of}} {\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{transfer}} {\nonterminal{Ident}} {\terminal{:}} {\nonterminal{Open}} {\terminal{{$-$}{$>$}}} {\nonterminal{Open}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ModBody}} & {\arrow} &{\nonterminal{Extend}} {\nonterminal{Opens}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{**}} {\nonterminal{Ident}} {\terminal{with}} {\nonterminal{ListOpen}} \\
|
|
||||||
& {\delimit} &{\terminal{reuse}} {\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{union}} {\nonterminal{ListIncluded}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListTopDef}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{TopDef}} {\nonterminal{ListTopDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Extend}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{**}} \\
|
|
||||||
& {\delimit} &{\emptyP} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListOpen}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{Open}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Open}} {\terminal{,}} {\nonterminal{ListOpen}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Opens}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\terminal{open}} {\nonterminal{ListOpen}} {\terminal{in}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Open}} & {\arrow} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{)}} \\
|
|
||||||
& {\delimit} &{\terminal{(}} {\nonterminal{QualOpen}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} {\terminal{)}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ComplMod}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\terminal{incomplete}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{QualOpen}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\terminal{incomplete}} \\
|
|
||||||
& {\delimit} &{\terminal{interface}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListIncluded}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{Included}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Included}} {\terminal{,}} {\nonterminal{ListIncluded}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Included}} & {\arrow} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{[}} {\nonterminal{ListIdent}} {\terminal{]}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Def}} & {\arrow} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Name}} {\nonterminal{ListPatt}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ListName}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{TopDef}} & {\arrow} &{\terminal{cat}} {\nonterminal{ListCatDef}} \\
|
|
||||||
& {\delimit} &{\terminal{fun}} {\nonterminal{ListFunDef}} \\
|
|
||||||
& {\delimit} &{\terminal{data}} {\nonterminal{ListFunDef}} \\
|
|
||||||
& {\delimit} &{\terminal{def}} {\nonterminal{ListDef}} \\
|
|
||||||
& {\delimit} &{\terminal{data}} {\nonterminal{ListDataDef}} \\
|
|
||||||
& {\delimit} &{\terminal{transfer}} {\nonterminal{ListDef}} \\
|
|
||||||
& {\delimit} &{\terminal{param}} {\nonterminal{ListParDef}} \\
|
|
||||||
& {\delimit} &{\terminal{oper}} {\nonterminal{ListDef}} \\
|
|
||||||
& {\delimit} &{\terminal{lincat}} {\nonterminal{ListPrintDef}} \\
|
|
||||||
& {\delimit} &{\terminal{lindef}} {\nonterminal{ListDef}} \\
|
|
||||||
& {\delimit} &{\terminal{lin}} {\nonterminal{ListDef}} \\
|
|
||||||
& {\delimit} &{\terminal{printname}} {\terminal{cat}} {\nonterminal{ListPrintDef}} \\
|
|
||||||
& {\delimit} &{\terminal{printname}} {\terminal{fun}} {\nonterminal{ListPrintDef}} \\
|
|
||||||
& {\delimit} &{\terminal{flags}} {\nonterminal{ListFlagDef}} \\
|
|
||||||
& {\delimit} &{\terminal{printname}} {\nonterminal{ListPrintDef}} \\
|
|
||||||
& {\delimit} &{\terminal{lintype}} {\nonterminal{ListDef}} \\
|
|
||||||
& {\delimit} &{\terminal{pattern}} {\nonterminal{ListDef}} \\
|
|
||||||
& {\delimit} &{\terminal{package}} {\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{\{}} {\nonterminal{ListTopDef}} {\terminal{\}}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\terminal{var}} {\nonterminal{ListDef}} \\
|
|
||||||
& {\delimit} &{\terminal{tokenizer}} {\nonterminal{Ident}} {\terminal{;}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{CatDef}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\
|
|
||||||
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} \\
|
|
||||||
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{ListDDecl}} {\terminal{]}} {\terminal{\{}} {\nonterminal{Integer}} {\terminal{\}}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{FunDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{DataDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListDataConstr}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{DataConstr}} & {\arrow} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListDataConstr}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{DataConstr}} \\
|
|
||||||
& {\delimit} &{\nonterminal{DataConstr}} {\terminal{{$|$}}} {\nonterminal{ListDataConstr}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ParDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{ListParConstr}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ParConstr}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListDDecl}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{PrintDef}} & {\arrow} &{\nonterminal{ListName}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{FlagDef}} & {\arrow} &{\nonterminal{Ident}} {\terminal{{$=$}}} {\nonterminal{Ident}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListDef}} & {\arrow} &{\nonterminal{Def}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Def}} {\terminal{;}} {\nonterminal{ListDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListCatDef}} & {\arrow} &{\nonterminal{CatDef}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\nonterminal{CatDef}} {\terminal{;}} {\nonterminal{ListCatDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListFunDef}} & {\arrow} &{\nonterminal{FunDef}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\nonterminal{FunDef}} {\terminal{;}} {\nonterminal{ListFunDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListDataDef}} & {\arrow} &{\nonterminal{DataDef}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\nonterminal{DataDef}} {\terminal{;}} {\nonterminal{ListDataDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListParDef}} & {\arrow} &{\nonterminal{ParDef}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ParDef}} {\terminal{;}} {\nonterminal{ListParDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListPrintDef}} & {\arrow} &{\nonterminal{PrintDef}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\nonterminal{PrintDef}} {\terminal{;}} {\nonterminal{ListPrintDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListFlagDef}} & {\arrow} &{\nonterminal{FlagDef}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\nonterminal{FlagDef}} {\terminal{;}} {\nonterminal{ListFlagDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListParConstr}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{ParConstr}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ParConstr}} {\terminal{{$|$}}} {\nonterminal{ListParConstr}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListIdent}} & {\arrow} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{,}} {\nonterminal{ListIdent}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Name}} & {\arrow} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\terminal{]}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListName}} & {\arrow} &{\nonterminal{Name}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Name}} {\terminal{,}} {\nonterminal{ListName}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{LocDef}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{ListIdent}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$=$}}} {\nonterminal{Exp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListLocDef}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{LocDef}} \\
|
|
||||||
& {\delimit} &{\nonterminal{LocDef}} {\terminal{;}} {\nonterminal{ListLocDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Exp4}} & {\arrow} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{\%}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Sort}} \\
|
|
||||||
& {\delimit} &{\nonterminal{String}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Integer}} \\
|
|
||||||
& {\delimit} &{\terminal{?}} \\
|
|
||||||
& {\delimit} &{\terminal{[}} {\terminal{]}} \\
|
|
||||||
& {\delimit} &{\terminal{data}} \\
|
|
||||||
& {\delimit} &{\terminal{[}} {\nonterminal{Ident}} {\nonterminal{Exps}} {\terminal{]}} \\
|
|
||||||
& {\delimit} &{\terminal{[}} {\nonterminal{String}} {\terminal{]}} \\
|
|
||||||
& {\delimit} &{\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListTupleComp}} {\terminal{{$>$}}} \\
|
|
||||||
& {\delimit} &{\terminal{(}} {\terminal{in}} {\nonterminal{Ident}} {\terminal{)}} \\
|
|
||||||
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{Exp}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{{$>$}}} \\
|
|
||||||
& {\delimit} &{\terminal{(}} {\nonterminal{Exp}} {\terminal{)}} \\
|
|
||||||
& {\delimit} &{\nonterminal{LString}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Exp3}} & {\arrow} &{\nonterminal{Exp3}} {\terminal{.}} {\nonterminal{Label}} \\
|
|
||||||
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{\%}} {\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\terminal{\%}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp4}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Exp2}} & {\arrow} &{\nonterminal{Exp2}} {\nonterminal{Exp3}} \\
|
|
||||||
& {\delimit} &{\terminal{table}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{table}} {\nonterminal{Exp4}} {\terminal{[}} {\nonterminal{ListExp}} {\terminal{]}} \\
|
|
||||||
& {\delimit} &{\terminal{case}} {\nonterminal{Exp}} {\terminal{of}} {\terminal{\{}} {\nonterminal{ListCase}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{variants}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{pre}} {\terminal{\{}} {\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListAltern}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{strs}} {\terminal{\{}} {\nonterminal{ListExp}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{@}} {\nonterminal{Exp4}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp3}} \\
|
|
||||||
& {\delimit} &{\terminal{Lin}} {\nonterminal{Ident}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Exp1}} & {\arrow} &{\nonterminal{Exp1}} {\terminal{!}} {\nonterminal{Exp2}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{*}} {\nonterminal{Exp2}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{**}} {\nonterminal{Exp2}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp2}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Exp}} & {\arrow} &{\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\terminal{$\backslash$}} {\terminal{$\backslash$}} {\nonterminal{ListBind}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Decl}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}{$+$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{{$+$}}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\terminal{let}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} {\terminal{in}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\terminal{let}} {\nonterminal{ListLocDef}} {\terminal{in}} {\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp1}} {\terminal{where}} {\terminal{\{}} {\nonterminal{ListLocDef}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{fn}} {\terminal{\{}} {\nonterminal{ListEquation}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp1}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListExp}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp}} {\terminal{;}} {\nonterminal{ListExp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Exps}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp4}} {\nonterminal{Exps}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Patt1}} & {\arrow} &{\terminal{\_}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{\{}} {\nonterminal{Ident}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Integer}} \\
|
|
||||||
& {\delimit} &{\nonterminal{String}} \\
|
|
||||||
& {\delimit} &{\terminal{\{}} {\nonterminal{ListPattAss}} {\terminal{\}}} \\
|
|
||||||
& {\delimit} &{\terminal{{$<$}}} {\nonterminal{ListPattTupleComp}} {\terminal{{$>$}}} \\
|
|
||||||
& {\delimit} &{\terminal{(}} {\nonterminal{Patt}} {\terminal{)}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Patt}} & {\arrow} &{\nonterminal{Ident}} {\nonterminal{ListPatt}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\terminal{.}} {\nonterminal{Ident}} {\nonterminal{ListPatt}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Patt1}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{PattAss}} & {\arrow} &{\nonterminal{ListIdent}} {\terminal{{$=$}}} {\nonterminal{Patt}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Label}} & {\arrow} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{\$}} {\nonterminal{Integer}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Sort}} & {\arrow} &{\terminal{Type}} \\
|
|
||||||
& {\delimit} &{\terminal{PType}} \\
|
|
||||||
& {\delimit} &{\terminal{Tok}} \\
|
|
||||||
& {\delimit} &{\terminal{Str}} \\
|
|
||||||
& {\delimit} &{\terminal{Strs}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListPattAss}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{PattAss}} \\
|
|
||||||
& {\delimit} &{\nonterminal{PattAss}} {\terminal{;}} {\nonterminal{ListPattAss}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{PattAlt}} & {\arrow} &{\nonterminal{Patt}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListPatt}} & {\arrow} &{\nonterminal{Patt1}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Patt1}} {\nonterminal{ListPatt}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListPattAlt}} & {\arrow} &{\nonterminal{PattAlt}} \\
|
|
||||||
& {\delimit} &{\nonterminal{PattAlt}} {\terminal{{$|$}}} {\nonterminal{ListPattAlt}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Bind}} & {\arrow} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{\_}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListBind}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{Bind}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Bind}} {\terminal{,}} {\nonterminal{ListBind}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Decl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp2}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{TupleComp}} & {\arrow} &{\nonterminal{Exp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{PattTupleComp}} & {\arrow} &{\nonterminal{Patt}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListTupleComp}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{TupleComp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{TupleComp}} {\terminal{,}} {\nonterminal{ListTupleComp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListPattTupleComp}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{PattTupleComp}} \\
|
|
||||||
& {\delimit} &{\nonterminal{PattTupleComp}} {\terminal{,}} {\nonterminal{ListPattTupleComp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Case}} & {\arrow} &{\nonterminal{ListPattAlt}} {\terminal{{$=$}{$>$}}} {\nonterminal{Exp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListCase}} & {\arrow} &{\nonterminal{Case}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Case}} {\terminal{;}} {\nonterminal{ListCase}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Equation}} & {\arrow} &{\nonterminal{ListPatt}} {\terminal{{$-$}{$>$}}} {\nonterminal{Exp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListEquation}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{Equation}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Equation}} {\terminal{;}} {\nonterminal{ListEquation}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Altern}} & {\arrow} &{\nonterminal{Exp}} {\terminal{/}} {\nonterminal{Exp}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListAltern}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{Altern}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Altern}} {\terminal{;}} {\nonterminal{ListAltern}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{DDecl}} & {\arrow} &{\terminal{(}} {\nonterminal{ListBind}} {\terminal{:}} {\nonterminal{Exp}} {\terminal{)}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Exp4}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListDDecl}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\nonterminal{DDecl}} {\nonterminal{ListDDecl}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{OldGrammar}} & {\arrow} &{\nonterminal{Include}} {\nonterminal{ListTopDef}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{Include}} & {\arrow} &{\emptyP} \\
|
|
||||||
& {\delimit} &{\terminal{include}} {\nonterminal{ListFileName}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{FileName}} & {\arrow} &{\nonterminal{String}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} \\
|
|
||||||
& {\delimit} &{\terminal{/}} {\nonterminal{FileName}} \\
|
|
||||||
& {\delimit} &{\terminal{.}} {\nonterminal{FileName}} \\
|
|
||||||
& {\delimit} &{\terminal{{$-$}}} {\nonterminal{FileName}} \\
|
|
||||||
& {\delimit} &{\nonterminal{Ident}} {\nonterminal{FileName}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
\begin{tabular}{lll}
|
|
||||||
{\nonterminal{ListFileName}} & {\arrow} &{\nonterminal{FileName}} {\terminal{;}} \\
|
|
||||||
& {\delimit} &{\nonterminal{FileName}} {\terminal{;}} {\nonterminal{ListFileName}} \\
|
|
||||||
\end{tabular}\\
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\end{document}
|
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 20 KiB |
@@ -1,75 +0,0 @@
|
|||||||
digraph {
|
|
||||||
|
|
||||||
size = "12,8" ;
|
|
||||||
|
|
||||||
Lang [style = "solid", shape = "ellipse", URL = "Lang.gf"];
|
|
||||||
|
|
||||||
Lang -> Grammar [style = "solid"];
|
|
||||||
Lang -> Lexicon [style = "solid"];
|
|
||||||
|
|
||||||
Grammar [style = "solid", shape = "ellipse", URL = "Lang.gf"];
|
|
||||||
|
|
||||||
|
|
||||||
Grammar -> Noun [style = "solid"];
|
|
||||||
Grammar -> Verb [style = "solid"];
|
|
||||||
Grammar -> Adjective [style = "solid"];
|
|
||||||
Grammar -> Adverb [style = "solid"];
|
|
||||||
Grammar -> Numeral [style = "solid"];
|
|
||||||
Grammar -> Sentence [style = "solid"];
|
|
||||||
Grammar -> Question [style = "solid"];
|
|
||||||
Grammar -> Relative [style = "solid"];
|
|
||||||
Grammar -> Conjunction [style = "solid"];
|
|
||||||
Grammar -> Phrase [style = "solid"];
|
|
||||||
Grammar -> Text [style = "solid"];
|
|
||||||
Grammar -> Idiom [style = "solid"];
|
|
||||||
Grammar -> Structural [style = "solid"];
|
|
||||||
|
|
||||||
|
|
||||||
Noun [style = "solid", shape = "ellipse", URL = "Noun.gf"];
|
|
||||||
Noun -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Verb [style = "solid", shape = "ellipse", URL = "Verb.gf"];
|
|
||||||
Verb -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Adjective [style = "solid", shape = "ellipse", URL = "Adjective.gf"];
|
|
||||||
Adjective -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Adverb [style = "solid", shape = "ellipse", URL = "Adverb.gf"];
|
|
||||||
Adverb -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Numeral [style = "solid", shape = "ellipse", URL = "Numeral.gf"];
|
|
||||||
Numeral -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Sentence [style = "solid", shape = "ellipse", URL = "Sentence.gf"];
|
|
||||||
Sentence -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Question [style = "solid", shape = "ellipse", URL = "Question.gf"];
|
|
||||||
Question -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Relative [style = "solid", shape = "ellipse", URL = "Relative.gf"];
|
|
||||||
Relative -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Conjunction [style = "solid", shape = "ellipse", URL = "Conjunction.gf"];
|
|
||||||
Conjunction -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Phrase [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
|
|
||||||
Phrase -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Text [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
|
|
||||||
Text -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Idiom [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
|
|
||||||
Idiom -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Structural [style = "solid", shape = "ellipse", URL = "Structural.gf"];
|
|
||||||
Structural -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Lexicon [style = "solid", shape = "ellipse", URL = "Lexicon.gf"];
|
|
||||||
Lexicon -> Cat [style = "solid"];
|
|
||||||
|
|
||||||
Cat [style = "solid", shape = "ellipse", URL = "Cat.gf"];
|
|
||||||
Cat -> Common [style = "solid"];
|
|
||||||
|
|
||||||
Common [style = "solid", shape = "ellipse", URL = "Tense.gf"];
|
|
||||||
|
|
||||||
}
|
|
||||||
|
Before Width: | Height: | Size: 77 KiB |
@@ -1,231 +0,0 @@
|
|||||||
|
|
||||||
* Some notes on the syntax of this file, making it possible to use todoo-mode.el:
|
|
||||||
|
|
||||||
- Items start with "* "
|
|
||||||
- Sub-items start with "- "
|
|
||||||
- It should be noted somewhere in the item, who has reported the item
|
|
||||||
Suggestion: Add "[who]" at the beginning of the item title
|
|
||||||
(then one can use "assign item" in todoo-mode)
|
|
||||||
- Each item should have a priority
|
|
||||||
Suggestion: Add "URGENT", "IMPORTANT" or "WISH" at the beginning of
|
|
||||||
the item title
|
|
||||||
- Sort the items in priority order
|
|
||||||
(todoo-mode can move an item up or down)
|
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
* [peb] URGENT: Error messages for syntax errors
|
|
||||||
|
|
||||||
When a syntax error is reported, it should be noted which file it
|
|
||||||
is. Otherwise it is impossible to know where the error is
|
|
||||||
(if one uses the -s flag):
|
|
||||||
|
|
||||||
> i -s Domain/MP3/Domain_MP_Semantics.gf
|
|
||||||
syntax error at line 33 before ve , Proposition ,
|
|
||||||
|
|
||||||
There's no problem with other kinds of errors:
|
|
||||||
|
|
||||||
> i -s Domain/MP3/Domain_MP_Semantics.gf
|
|
||||||
checking module Godis_Semantics
|
|
||||||
Happened in linearization of userMove :
|
|
||||||
product expected instead of {
|
|
||||||
pl : Str
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
* [peb] IMPORTANT: Add the -path of a module to daughter modules
|
|
||||||
|
|
||||||
Then the main module does not have to know where all grandchildren are:
|
|
||||||
|
|
||||||
file A.gf:
|
|
||||||
abstract A = B ** {...}
|
|
||||||
|
|
||||||
file B.gf:
|
|
||||||
--# -path=./resource
|
|
||||||
abstract B = Lang ** {...}
|
|
||||||
|
|
||||||
I.e.: the file A.gf should not need to know that B.gf uses the
|
|
||||||
resource library.
|
|
||||||
|
|
||||||
|
|
||||||
* [peb] IMPORTANT: incomplete concrete and interfaces
|
|
||||||
|
|
||||||
- The following works in GF:
|
|
||||||
|
|
||||||
incomplete concrete TestDI of TestA = open (C=TestCI) in {
|
|
||||||
lincat A = TestCI.A ** {p : Str};
|
|
||||||
lin f = TestCI.f ** {p = "f"};
|
|
||||||
g = TestCI.g ** {p = "g"};
|
|
||||||
}
|
|
||||||
|
|
||||||
> i -src TestDE.gf
|
|
||||||
|
|
||||||
- BUT, if we exchange "TestCI" for "C" we get an error:
|
|
||||||
|
|
||||||
incomplete concrete TestDI of TestA = open (C=TestCI) in {
|
|
||||||
lincat A = C.A ** {p : Str};
|
|
||||||
lin f = C.f ** {p = "f"};
|
|
||||||
g = C.g ** {p = "g"};
|
|
||||||
}
|
|
||||||
|
|
||||||
> i -src TestDE.gf
|
|
||||||
compiling TestDE.gf... failed to find C
|
|
||||||
OCCURRED IN
|
|
||||||
atomic term C given TestCE TestCI TestCE TestDE
|
|
||||||
OCCURRED IN
|
|
||||||
renaming definition of f
|
|
||||||
OCCURRED IN
|
|
||||||
renaming module TestDE
|
|
||||||
|
|
||||||
- the other modules:
|
|
||||||
|
|
||||||
abstract TestA = {
|
|
||||||
cat A;
|
|
||||||
fun f, g : A;
|
|
||||||
}
|
|
||||||
|
|
||||||
instance TestBE of TestBI = {
|
|
||||||
oper hello = "hello";
|
|
||||||
bye = "bye";
|
|
||||||
}
|
|
||||||
|
|
||||||
interface TestBI = {
|
|
||||||
oper hello : Str;
|
|
||||||
bye : Str;
|
|
||||||
}
|
|
||||||
|
|
||||||
concrete TestCE of TestA = TestCI with (TestBI = TestBE);
|
|
||||||
|
|
||||||
incomplete concrete TestCI of TestA = open TestBI in {
|
|
||||||
lincat A = {s : Str};
|
|
||||||
lin f = {s = hello};
|
|
||||||
g = {s = bye};
|
|
||||||
}
|
|
||||||
|
|
||||||
concrete TestDE of TestA = TestDI with (TestCI = TestCE);
|
|
||||||
|
|
||||||
* [peb] IMPORTANT: Missing things in the help command
|
|
||||||
|
|
||||||
> h -printer
|
|
||||||
(the flag -printer=cfgm is missing)
|
|
||||||
|
|
||||||
> h -cat
|
|
||||||
WARNING: invalid option: cat
|
|
||||||
|
|
||||||
> h -lang
|
|
||||||
WARNING: invalid option: lang
|
|
||||||
|
|
||||||
> h -language
|
|
||||||
WARNING: invalid option: language
|
|
||||||
|
|
||||||
> h -parser
|
|
||||||
WARNING: invalid option: parser
|
|
||||||
|
|
||||||
> h -aslkdjaslkdjss
|
|
||||||
WARNING: invalid option: aslkdjaslkdjss
|
|
||||||
Command not found.
|
|
||||||
(it should note: "option not found")
|
|
||||||
|
|
||||||
> h -optimize
|
|
||||||
WARNING: invalid option: optimize
|
|
||||||
|
|
||||||
> h -startcat
|
|
||||||
WARNING: invalid option: startcat
|
|
||||||
|
|
||||||
> h h
|
|
||||||
h, help: h Command?
|
|
||||||
(it should also mention "h -option")
|
|
||||||
|
|
||||||
|
|
||||||
* [peb] IMPORTANT: Set GF_LIb-PATH within GF
|
|
||||||
|
|
||||||
> sf libpath=~/GF/lib
|
|
||||||
|
|
||||||
|
|
||||||
* [peb] IMPORTANT: Set the starting category with "sf"
|
|
||||||
|
|
||||||
> sf startcat=X
|
|
||||||
|
|
||||||
|
|
||||||
* [peb] IMPORTANT: import-flags
|
|
||||||
|
|
||||||
- There are some inconsistencies when importing grammars:
|
|
||||||
|
|
||||||
1. when doing "pg -printer=cfg", one must have used "i -conversion=finite",
|
|
||||||
since "pg" doesn't care about the flags that are set in the grammar file
|
|
||||||
|
|
||||||
2. when doing "pm -printer=cfgm", one must have set the flag
|
|
||||||
"conversion=finite" within the grammar file, since "pm" doesn't
|
|
||||||
care about the flags to the import command
|
|
||||||
|
|
||||||
(I guess it's me (peb) who should fix this, but I don't know where
|
|
||||||
the different flags reside...)
|
|
||||||
|
|
||||||
- Also, it must be decided in what cases flags can override other flags:
|
|
||||||
|
|
||||||
a) in the grammar file, e.g. "flags conversion=finite;"
|
|
||||||
b) on the command line, e.g. "> sf conversion=finite"
|
|
||||||
c) as argument to a command, e.g. "> i -conversion=finite file.gf"
|
|
||||||
|
|
||||||
- A related issue is to decide the scope of flags:
|
|
||||||
|
|
||||||
Some flags are (or should be) local to the module
|
|
||||||
(e.g. -coding and -path)
|
|
||||||
Other flags override daughter flags for daughter modules
|
|
||||||
(e.g. -startcat and -conversion)
|
|
||||||
|
|
||||||
* [bringert] IMPORTANT: get right startcat flag when printing CFGM
|
|
||||||
GF.CFGM.PrintCFGrammar.prCanonAsCFGM currently only gets the startcat
|
|
||||||
flag from the top-level concrete module. This might be easier
|
|
||||||
to fix if the multi grammar printers had access to more than just
|
|
||||||
the CanonGrammar.
|
|
||||||
|
|
||||||
* [peb] WISH: generalizing incomplete concrete
|
|
||||||
|
|
||||||
I want to be able to open an incomplete concrete module
|
|
||||||
inside another incomplete conrete.
|
|
||||||
Then I can instantiate both incompletes at the same time.
|
|
||||||
|
|
||||||
* [peb] WISH: _tmpi, _tmpo
|
|
||||||
|
|
||||||
The files _tmpi and _tmpo are never removed when quitting GF.
|
|
||||||
Further suggestion: put them in /tmp or similar.
|
|
||||||
|
|
||||||
peb: när man använder "|" till ett systemanrop, t.ex:
|
|
||||||
pg | ! sort
|
|
||||||
så skapas filerna _tmpi och _tmpo. Men de tas aldrig bort.
|
|
||||||
|
|
||||||
peb: Ännu bättre: ta bort filerna efteråt.
|
|
||||||
|
|
||||||
aarne: Sant: när GF quittas (om detta inte sker onormalt).
|
|
||||||
Eller när kommandot har kört färdigt (om det terminerar).
|
|
||||||
|
|
||||||
peb: Bäst(?): skapa filerna i /tmp eller liknande.
|
|
||||||
|
|
||||||
aarne: Ibland får man skrivrättighetsproblem - och det är
|
|
||||||
inte kul om man måste ange en tmp-path. Och olika
|
|
||||||
användare och gf-processer måste ha unika filnamn.
|
|
||||||
Och vet inte hur det funkar på windows...
|
|
||||||
|
|
||||||
aarne: Ett till alternativ skulle vara att använda handles
|
|
||||||
utan några tmp-filer alls. Men jag har inte hunnit
|
|
||||||
ta reda på hur det går till.
|
|
||||||
|
|
||||||
björn: Lite slumpmässiga tankar:
|
|
||||||
+ man kan använda System.Directory.getTemporaryDirectory, så slipper man iaf bry sig om olika plattformsproblem.
|
|
||||||
+ sen kan man använda System.IO.openTempFile för att skapa en temporär fil. Den tas dock inte bort när programmet avslutas, så det får man fixa själv.
|
|
||||||
+ System.Posix.Temp.mkstemp gör nåt liknande, men dokumentationen är dålig.
|
|
||||||
+ biblioteket HsShellScript har lite funktioner för sånt här, se
|
|
||||||
http://www.volker-wysk.de/hsshellscript/apidoc/HsShellScript.html#16
|
|
||||||
|
|
||||||
|
|
||||||
* [peb] WISH: Hierarchic modules
|
|
||||||
|
|
||||||
Suggestion by peb:
|
|
||||||
The module A.B.C is located in the file A/B/C.gf
|
|
||||||
|
|
||||||
Main advantage: you no longer need to state "--# -path=..." in
|
|
||||||
modules
|
|
||||||
|
|
||||||
- How can this be combined with several modules inside one file?
|
|
||||||
@@ -1,750 +0,0 @@
|
|||||||
Compiling GF
|
|
||||||
Aarne Ranta
|
|
||||||
Proglog meeting, 1 November 2006
|
|
||||||
|
|
||||||
% to compile: txt2tags -thtml compiling-gf.txt ; htmls compiling-gf.html
|
|
||||||
|
|
||||||
%!target:html
|
|
||||||
%!postproc(html): #NEW <!-- NEW -->
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The compilation task==
|
|
||||||
|
|
||||||
GF is a grammar formalism, i.e. a special purpose programming language
|
|
||||||
for writing grammars.
|
|
||||||
|
|
||||||
Other grammar formalisms:
|
|
||||||
- BNF, YACC, Happy (grammars for programming languages);
|
|
||||||
- PATR, HPSG, LFG (grammars for natural languages).
|
|
||||||
|
|
||||||
|
|
||||||
The grammar compiler prepares a GF grammar for two computational tasks:
|
|
||||||
- linearization: take syntax trees to strings
|
|
||||||
- parsing: take strings to syntax trees
|
|
||||||
|
|
||||||
|
|
||||||
The grammar gives a declarative description of these functionalities,
|
|
||||||
on a high abstraction level that improves grammar writing
|
|
||||||
productivity.
|
|
||||||
|
|
||||||
For efficiency, the grammar is compiled to lower-level formats.
|
|
||||||
|
|
||||||
Type checking is another essential compilation phase. Its purpose is
|
|
||||||
twofold, as usual:
|
|
||||||
- checking the correctness of the grammar
|
|
||||||
- type-annotating expressions for code generation
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Characteristics of GF language==
|
|
||||||
|
|
||||||
Functional language with types, both built-in and user-defined.
|
|
||||||
```
|
|
||||||
Str : Type
|
|
||||||
|
|
||||||
param Number = Sg | Pl
|
|
||||||
|
|
||||||
param AdjForm = ASg Gender | APl
|
|
||||||
|
|
||||||
Noun : Type = {s : Number => Str ; g : Gender}
|
|
||||||
```
|
|
||||||
Pattern matching.
|
|
||||||
```
|
|
||||||
svart_A = table {
|
|
||||||
ASg _ => "svart" ;
|
|
||||||
_ => "svarta"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
Higher-order functions.
|
|
||||||
|
|
||||||
Dependent types.
|
|
||||||
```
|
|
||||||
flip : (a, b, c : Type) -> (a -> b -> c) -> b -> a -> c =
|
|
||||||
\_,_,_,f,y,x -> f x y ;
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The module system of GF==
|
|
||||||
|
|
||||||
Main division: abstract syntax and concrete syntax
|
|
||||||
```
|
|
||||||
abstract Greeting = {
|
|
||||||
cat Greet ;
|
|
||||||
fun Hello : Greet ;
|
|
||||||
}
|
|
||||||
|
|
||||||
concrete GreetingEng of Greeting = {
|
|
||||||
lincat Greet = {s : Str} ;
|
|
||||||
lin Hello = {s = "hello"} ;
|
|
||||||
}
|
|
||||||
|
|
||||||
concrete GreetingIta of Greeting = {
|
|
||||||
param Politeness = Familiar | Polite ;
|
|
||||||
lincat Greet = {s : Politeness => Str} ;
|
|
||||||
lin Hello = {s = table {
|
|
||||||
Familiar => "ciao" ;
|
|
||||||
Polite => "buongiorno"
|
|
||||||
} ;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
Other features of the module system:
|
|
||||||
- extension and opening
|
|
||||||
- parametrized modules (cf. ML: signatures, structures, functors)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==GF vs. Haskell==
|
|
||||||
|
|
||||||
Some things that (standard) Haskell hasn't:
|
|
||||||
- records and record subtyping
|
|
||||||
- regular expression patterns
|
|
||||||
- dependent types
|
|
||||||
- ML-style modules
|
|
||||||
|
|
||||||
|
|
||||||
Some things that GF hasn't:
|
|
||||||
- infinite (recursive) data types
|
|
||||||
- recursive functions
|
|
||||||
- classes, polymorphism
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==GF vs. most linguistic grammar formalisms==
|
|
||||||
|
|
||||||
GF separates abstract syntax from concrete syntax.
|
|
||||||
|
|
||||||
GF has a module system with separate compilation.
|
|
||||||
|
|
||||||
GF is generation-oriented (as opposed to parsing).
|
|
||||||
|
|
||||||
GF has unidirectional matching (as opposed to unification).
|
|
||||||
|
|
||||||
GF has a static type system (as opposed to a type-free universe).
|
|
||||||
|
|
||||||
"I was - and I still am - firmly convinced that a program composed
|
|
||||||
out of statically type-checked parts is more likely to faithfully
|
|
||||||
express a well-thought-out design than a program relying on
|
|
||||||
weakly-typed interfaces or dynamically-checked interfaces."
|
|
||||||
(B. Stroustrup, 1994, p. 107)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The computation model: abstract syntax==
|
|
||||||
|
|
||||||
An abstract syntax defines a free algebra of trees (using
|
|
||||||
dependent types, recursion, higher-order abstract syntax:
|
|
||||||
GF includes a complete Logical Framework).
|
|
||||||
```
|
|
||||||
cat C (x_1 : A_1)...(x_n : A_n)
|
|
||||||
a_1 : A_1
|
|
||||||
...
|
|
||||||
a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1}
|
|
||||||
----------------------------------------------------
|
|
||||||
(C a_1 ... a_n) : Type
|
|
||||||
|
|
||||||
|
|
||||||
fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A
|
|
||||||
a_1 : A_1
|
|
||||||
...
|
|
||||||
a_n : A_n{x_1 : A_1,...,x_n-1 : A_n-1}
|
|
||||||
----------------------------------------------------
|
|
||||||
(f a_1 ... a_n) : A{x_1 : A_1,...,x_n : A_n}
|
|
||||||
|
|
||||||
|
|
||||||
A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A
|
|
||||||
---------------------------- ---------------------- ------------------------
|
|
||||||
(x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A}
|
|
||||||
```
|
|
||||||
Notice that all syntax trees are in eta-long form.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The computation model: concrete syntax==
|
|
||||||
|
|
||||||
A concrete syntax defines a homomorphism (compositional mapping)
|
|
||||||
from the abstract syntax to a system of concrete syntax objects.
|
|
||||||
```
|
|
||||||
cat C _
|
|
||||||
--------------------
|
|
||||||
lincat C = C* : Type
|
|
||||||
|
|
||||||
fun f : (x_1 : A_1) -> ... -> (x_n : A_n) -> A
|
|
||||||
-----------------------------------------------
|
|
||||||
lin f = f* : A_1* -> ... -> A_n* -> A*
|
|
||||||
|
|
||||||
(f a_1 ... a_n)* = f* a_1* ... a_n*
|
|
||||||
```
|
|
||||||
The homomorphism can as such be used as linearization function.
|
|
||||||
|
|
||||||
It is a functional program, but a restricted one, since it works
|
|
||||||
in the end on finite data structures only.
|
|
||||||
|
|
||||||
But a more efficient program is obtained via compilation to
|
|
||||||
GFC = Canonical GF: the "machine code" of GF.
|
|
||||||
|
|
||||||
The parsing problem of GFC can be reduced to that of MPCFG (Multiple
|
|
||||||
Parallel Context Free Grammars), see P. Ljunglöf's thesis (2004).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The core type system of concrete syntax: basic types==
|
|
||||||
|
|
||||||
```
|
|
||||||
param P P : PType
|
|
||||||
PType : Type --------- ---------
|
|
||||||
P : PType P : Type
|
|
||||||
|
|
||||||
s : Str t : Str
|
|
||||||
Str : type "foo" : Str [] : Str ----------------
|
|
||||||
s ++ t : Str
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The core type system of concrete syntax: functions and tables==
|
|
||||||
|
|
||||||
```
|
|
||||||
A : Type x : A |- B : Type x : A |- b : B f : (x : A) -> B a : A
|
|
||||||
---------------------------- ---------------------- ------------------------
|
|
||||||
(x : A) -> B : Type \x -> b : (x : A) -> B f a : B{x := A}
|
|
||||||
|
|
||||||
|
|
||||||
P : PType A : Type t : P => A p : p
|
|
||||||
-------------------- -----------------
|
|
||||||
P => A : Type t ! p : A
|
|
||||||
|
|
||||||
v_1,...,v_n : A
|
|
||||||
---------------------------------------------- P = {C_1,...,C_n}
|
|
||||||
table {C_1 => v_1 ; ... ; C_n => v_n} : P => A
|
|
||||||
```
|
|
||||||
Pattern matching is treated as an abbreviation for tables. Notice that
|
|
||||||
```
|
|
||||||
case e of {...} == table {...} ! e
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The core type system of concrete syntax: records==
|
|
||||||
|
|
||||||
```
|
|
||||||
A_1,...,A_n : Type
|
|
||||||
------------------------------------ n >= 0
|
|
||||||
{r_1 : A_1 ; ... ; r_n : A_n} : Type
|
|
||||||
|
|
||||||
|
|
||||||
a_1 : A_1 ... a_n : A_n
|
|
||||||
------------------------------------------------------------
|
|
||||||
{r_1 = a_1 ; ... ; r_n = a_n} : {r_1 : A_1 ; ... ; r_n : A_n}
|
|
||||||
|
|
||||||
|
|
||||||
r : {r_1 : A_1 ; ... ; r_n : A_n}
|
|
||||||
----------------------------------- i = 1,...,n
|
|
||||||
r.r_1 : A_1
|
|
||||||
```
|
|
||||||
Subtyping: if ``r : R`` then ``r : R ** {r : A}``
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Computation rules==
|
|
||||||
|
|
||||||
```
|
|
||||||
(\x -> b) a = b{x := a}
|
|
||||||
|
|
||||||
(table {C_1 => v_1 ; ... ; C_n => v_n} : P => A) ! C_i = v_i
|
|
||||||
|
|
||||||
{r_1 = a_1 ; ... ; r_n = a_n}.r_i = a_i
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Canonical GF==
|
|
||||||
|
|
||||||
Concrete syntax type system:
|
|
||||||
```
|
|
||||||
A_1 : Type ... A_n : Type
|
|
||||||
Str : Type Int : Type ------------------------- $i : A
|
|
||||||
[A_1, ..., A_n] : Type
|
|
||||||
|
|
||||||
|
|
||||||
a_1 : A_1 ... a_n : A_n t : [A_1, ..., A_n]
|
|
||||||
--------------------------------- ------------------- i = 1,..,n
|
|
||||||
[a_1, ..., a_n] : [A_1, ..., A_n] t ! i : A_i
|
|
||||||
```
|
|
||||||
Tuples represent both records and tables.
|
|
||||||
|
|
||||||
There are no functions.
|
|
||||||
|
|
||||||
Linearization:
|
|
||||||
```
|
|
||||||
lin f = f*
|
|
||||||
|
|
||||||
(f a_1 ... a_n)* = f*{$1 = a_1*, ..., $n = a_n*}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The compilation task, again==
|
|
||||||
|
|
||||||
1. From a GF source grammar, derive a canonical GF grammar.
|
|
||||||
|
|
||||||
2. From the canonical GF grammar derive an MPCFG grammar
|
|
||||||
|
|
||||||
The canonical GF grammar can be used for linearization, with
|
|
||||||
linear time complexity (w.r.t. the size of the tree).
|
|
||||||
|
|
||||||
The MPCFG grammar can be used for parsing, with (unbounded)
|
|
||||||
polynomial time complexity (w.r.t. the size of the string).
|
|
||||||
|
|
||||||
For these target formats, we have also built interpreters in
|
|
||||||
different programming languages (C, C++, Haskell, Java, Prolog).
|
|
||||||
|
|
||||||
Moreover, we generate supplementary formats such as grammars
|
|
||||||
required by various speech recognition systems.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==An overview of compilation phases==
|
|
||||||
|
|
||||||
Legend:
|
|
||||||
- ellipse node: representation saved in a file
|
|
||||||
- plain text node: internal representation
|
|
||||||
- solid arrow or ellipse: essential phare or format
|
|
||||||
- dashed arrow or ellipse: optional phase or format
|
|
||||||
- arrow label: the module implementing the phase
|
|
||||||
|
|
||||||
|
|
||||||
[gf-compiler.png]
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Using the compiler==
|
|
||||||
|
|
||||||
Batch mode (cf. GHC).
|
|
||||||
|
|
||||||
Interactive mode, building the grammar incrementally from
|
|
||||||
different files, with the possibility of testing them
|
|
||||||
(cf. GHCI).
|
|
||||||
|
|
||||||
The interactive mode was first, built on the model of ALF-2
|
|
||||||
(L. Magnusson), and there was no file output of compiled
|
|
||||||
grammars.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Modules and separate compilation==
|
|
||||||
|
|
||||||
The above diagram shows what happens to each module.
|
|
||||||
(But not quite, since some of the back-end formats must be
|
|
||||||
built for sets of modules: GFCC and the parser formats.)
|
|
||||||
|
|
||||||
When the grammar compiler is called, it has a main module as its
|
|
||||||
argument. It then builds recursively a dependency graph with all
|
|
||||||
the other modules, and decides which ones must be recompiled.
|
|
||||||
The behaviour is rather similar to GHC.
|
|
||||||
|
|
||||||
Separate compilation is //extremely important// when developing
|
|
||||||
big grammars, especially when using grammar libraries. Example: compiling
|
|
||||||
the GF resource grammar library takes 5 minutes, whereas reading
|
|
||||||
in the compiled image takes 10 seconds.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Module dependencies and recompilation==
|
|
||||||
|
|
||||||
(For later use, not for the Proglog talk)
|
|
||||||
|
|
||||||
For each module M, there are 3 kinds of files:
|
|
||||||
- M.gf, source file
|
|
||||||
- M.gfc, compiled file ("object file")
|
|
||||||
- M.gfr, type-checked and optimized source file (for resource modules only)
|
|
||||||
|
|
||||||
|
|
||||||
The compiler reads gf files and writes gfc files (and gfr files if appropriate)
|
|
||||||
|
|
||||||
The Main module is the one used as argument when calling GF.
|
|
||||||
|
|
||||||
A module M (immediately) depends on the module K, if either
|
|
||||||
- M is a concrete of K
|
|
||||||
- M is an instance of K
|
|
||||||
- M extends K
|
|
||||||
- M opens K
|
|
||||||
- M is a completion of K with something
|
|
||||||
- M is a completion of some module with K instantiated with something
|
|
||||||
|
|
||||||
|
|
||||||
A module M (transitively) depends on the module K, if either
|
|
||||||
- M immediately depends on K
|
|
||||||
- M depends on some L such that L immediately depends on K
|
|
||||||
|
|
||||||
|
|
||||||
Immediate dependence is readable from the module header without parsing
|
|
||||||
the whole module.
|
|
||||||
|
|
||||||
The compiler reads recursively the headers of all modules that Main depends on.
|
|
||||||
|
|
||||||
These modules are arranged in a dependency graph, which is checked to be acyclic.
|
|
||||||
|
|
||||||
To decide whether a module M has to be compiled, do:
|
|
||||||
+ Get the time stamps t() of M.gf and M.gfc (if a file doesn't exist, its
|
|
||||||
time is minus infinity).
|
|
||||||
+ If t(M.gf) > t(M.gfc), M must be compiled.
|
|
||||||
+ If M depends on K and K must be compiled, then M must be compiled.
|
|
||||||
+ If M depends on K and t(K.gf) > t(M.gfc), then M must be compiled.
|
|
||||||
|
|
||||||
|
|
||||||
Decorate the dependency graph by information on whether the gf or the gfc (and gfr)
|
|
||||||
format is to be read.
|
|
||||||
|
|
||||||
Topologically sort the decorated graph, and read each file in the chosen format.
|
|
||||||
|
|
||||||
The gfr file is generated for these module types only:
|
|
||||||
- resource
|
|
||||||
- instance
|
|
||||||
|
|
||||||
|
|
||||||
When reading K.gfc, also K.gfr is read if some M depending on K has to be compiled.
|
|
||||||
In other cases, it is enough to read K.gfc.
|
|
||||||
|
|
||||||
In an interactive GF session, some modules may be in memory already.
|
|
||||||
When read to the memory, each module M is given time stamp t(M.m).
|
|
||||||
The additional rule now is:
|
|
||||||
- If M.gfc is to be read, and t(M.m) > t(M.gfc), don't read M.gfc.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Techniques used==
|
|
||||||
|
|
||||||
The compiler is written in Haskell, with some C foreign function calls
|
|
||||||
in the interactive version (readline, killing threads).
|
|
||||||
|
|
||||||
BNFC is used for generating both the parsers and printers.
|
|
||||||
This has helped to make the formats portable.
|
|
||||||
|
|
||||||
"Almost compositional functions" (``composOp``) are used in
|
|
||||||
many compiler passes, making them easier to write and understand.
|
|
||||||
A ``grep`` on the sources reveals 40 uses (outside the definition
|
|
||||||
of ``composOp`` itself).
|
|
||||||
|
|
||||||
The key algorithmic ideas are
|
|
||||||
- type-driven partial evaluation in GF-to-GFC generation
|
|
||||||
- common subexpression elimination as back-end optimization
|
|
||||||
- some ideas in GFC-to-MCFG encoding
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Type-driven partial evaluation==
|
|
||||||
|
|
||||||
Each abstract syntax category in GF has a corresponding linearization type:
|
|
||||||
```
|
|
||||||
cat C
|
|
||||||
lincat C = T
|
|
||||||
```
|
|
||||||
The general form of a GF rule pair is
|
|
||||||
```
|
|
||||||
fun f : C1 -> ... -> Cn -> C
|
|
||||||
lin f = t
|
|
||||||
```
|
|
||||||
with the typing condition following the ``lincat`` definitions
|
|
||||||
```
|
|
||||||
t : T1 -> ... -> Tn -> T
|
|
||||||
```
|
|
||||||
The term ``t`` is in general built by using abstraction methods such
|
|
||||||
as pattern matching, higher-order functions, local definitions,
|
|
||||||
and library functions.
|
|
||||||
|
|
||||||
The compilation technique proceeds as follows:
|
|
||||||
- use eta-expansion on ``t`` to determine the canonical form of the term
|
|
||||||
```
|
|
||||||
\ $C1, ...., $Cn -> (t $C1 .... $Cn)
|
|
||||||
```
|
|
||||||
with unique variables ``$C1 .... $Cn`` for the arguments; repeat this
|
|
||||||
inside the term for records and tables
|
|
||||||
- evaluate the resulting term using the computation rules of GF
|
|
||||||
- what remains is a canonical term with ``$C1 .... $Cn`` the only
|
|
||||||
variables (the run-time input of the linearization function)
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Eta-expanding records and tables==
|
|
||||||
|
|
||||||
For records that are valied via subtyping, eta expansion
|
|
||||||
eliminates superfluous fields:
|
|
||||||
```
|
|
||||||
{r1 = t1 ; r2 = t2} : {r1 : T1} ----> {r1 = t1}
|
|
||||||
```
|
|
||||||
For tables, the effect is always expansion, since
|
|
||||||
pattern matching can be used to represent tables
|
|
||||||
compactly:
|
|
||||||
```
|
|
||||||
table {n => "fish"} : Number => Str --->
|
|
||||||
|
|
||||||
table {
|
|
||||||
Sg => "fish" ;
|
|
||||||
Pl => "fish"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
This can be helped by back-end optimizations (see below).
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Eliminating functions==
|
|
||||||
|
|
||||||
"Everything is finite": parameter types, records, tables;
|
|
||||||
finite number of string tokens per grammar.
|
|
||||||
|
|
||||||
But "inifinite types" such as function types are useful when
|
|
||||||
writing grammars, to enable abstractions.
|
|
||||||
|
|
||||||
Since function types do not appear in linearization types,
|
|
||||||
we want functions to be eliminated from linearization terms.
|
|
||||||
|
|
||||||
This is similar to the **subformula property** in logic.
|
|
||||||
Also the main problem is similar: function depending on
|
|
||||||
a run-time variable,
|
|
||||||
```
|
|
||||||
(table {P => f ; Q = g} ! x) a
|
|
||||||
```
|
|
||||||
This is not a redex, but we can make it closer to one by moving
|
|
||||||
the application inside the table,
|
|
||||||
```
|
|
||||||
table {P => f a ; Q = g a} ! x
|
|
||||||
```
|
|
||||||
This transformation is the same as Prawitz's (1965) elimination
|
|
||||||
of maximal segments in natural deduction:
|
|
||||||
```
|
|
||||||
A B
|
|
||||||
C -> D C C -> D C
|
|
||||||
A B --------- ---------
|
|
||||||
A v B C -> D C -> D A v B D D
|
|
||||||
--------------------- ===> -------------------------
|
|
||||||
C -> D C D
|
|
||||||
--------------------
|
|
||||||
D
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Size effects of partial evaluation==
|
|
||||||
|
|
||||||
Irrelevant table branches are thrown away, which can reduce the size.
|
|
||||||
|
|
||||||
But, since tables are expanded and auxiliary functions are inlined,
|
|
||||||
the size can grow exponentially.
|
|
||||||
|
|
||||||
How can we keep the first property and eliminate the second?
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Parametrization of tables==
|
|
||||||
|
|
||||||
Algorithm: for each branch in a table, consider replacing the
|
|
||||||
argument by a variable:
|
|
||||||
```
|
|
||||||
table { table {
|
|
||||||
P => t ; ---> x => t[P->x] ;
|
|
||||||
Q => u x => u[Q->x]
|
|
||||||
} }
|
|
||||||
```
|
|
||||||
If the resulting branches are all equal, you can replace the table
|
|
||||||
by a lambda abstract
|
|
||||||
```
|
|
||||||
\\x => t[P->x]
|
|
||||||
```
|
|
||||||
If each created variable ``x`` is unique in the grammar, computation
|
|
||||||
with the lambda abstract is efficient.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Course-of-values tables==
|
|
||||||
|
|
||||||
By maintaining a canonical order of parameters in a type, we can
|
|
||||||
eliminate the left hand sides of branches.
|
|
||||||
```
|
|
||||||
table { table T [
|
|
||||||
P => t ; ---> t ;
|
|
||||||
Q => u u
|
|
||||||
} ]
|
|
||||||
```
|
|
||||||
The treatment is similar to ``Enum`` instances in Haskell.
|
|
||||||
|
|
||||||
In the end, all parameter types can be translated to
|
|
||||||
initial segments of integers.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Common subexpression elimination==
|
|
||||||
|
|
||||||
Algorithm:
|
|
||||||
+ Go through all terms and subterms in a module, creating
|
|
||||||
a symbol table mapping terms to the number of occurrences.
|
|
||||||
+ For each subterm appearing at least twice, create a fresh
|
|
||||||
constant defined as that subterm.
|
|
||||||
+ Go through all rules (incl. rules for the new constants),
|
|
||||||
replacing largest possible subterms with such new constants.
|
|
||||||
|
|
||||||
|
|
||||||
This algorithm, in a way, creates the strongest possible abstractions.
|
|
||||||
|
|
||||||
In general, the new constants have open terms as definitions.
|
|
||||||
But since all variables (and constants) are unique, they can
|
|
||||||
be computed by simple replacement.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Size effects of optimizations==
|
|
||||||
|
|
||||||
Example: the German resource grammar
|
|
||||||
``LangGer``
|
|
||||||
|
|
||||||
|| optimization | lines | characters | size % | blow-up |
|
|
||||||
| none | 5394 | 3208435 | 100 | 25 |
|
|
||||||
| all | 5394 | 750277 | 23 | 6 |
|
|
||||||
| none_subs | 5772 | 1290866 | 40 | 10 |
|
|
||||||
| all_subs | 5644 | 414119 | 13 | 3 |
|
|
||||||
| gfcc | 3279 | 190004 | 6 | 1.5 |
|
|
||||||
| gf source | 3976 | 121939 | 4 | 1 |
|
|
||||||
|
|
||||||
|
|
||||||
Optimization "all" means parametrization + course-of-values.
|
|
||||||
|
|
||||||
The source code size is an estimate, since it includes
|
|
||||||
potentially irrelevant library modules, and comments.
|
|
||||||
|
|
||||||
The GFCC format is not reusable in separate compilation.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The shared prefix optimization==
|
|
||||||
|
|
||||||
This is currently performed in GFCC only.
|
|
||||||
|
|
||||||
The idea works for languages that have a rich morphology
|
|
||||||
based on suffixes. Then we can replace a course of values
|
|
||||||
with a pair of a prefix and a suffix set:
|
|
||||||
```
|
|
||||||
["apa", "apan", "apor", "aporna"] --->
|
|
||||||
("ap" + ["a", "an", "or", "orna"])
|
|
||||||
```
|
|
||||||
The real gain comes via common subexpression elimination:
|
|
||||||
```
|
|
||||||
_34 = ["a", "an", "or", "orna"]
|
|
||||||
apa = ("ap" + _34)
|
|
||||||
blomma = ("blomm" + _34)
|
|
||||||
flicka = ("flick" + _34)
|
|
||||||
```
|
|
||||||
Notice that it now matters a lot how grammars are written.
|
|
||||||
For instance, if German verbs are treated as a one-dimensional
|
|
||||||
table,
|
|
||||||
```
|
|
||||||
["lieben", "liebe", "liebst", ...., "geliebt", "geliebter",...]
|
|
||||||
```
|
|
||||||
no shared prefix optimization is possible. A better form is
|
|
||||||
separate tables for non-"ge" and "ge" forms:
|
|
||||||
```
|
|
||||||
[["lieben", "liebe", "liebst", ....], ["geliebt", "geliebter",...]]
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Reuse of grammars as libraries==
|
|
||||||
|
|
||||||
The idea of resource grammars: take care of all aspects of
|
|
||||||
surface grammaticality (inflection, agreement, word order).
|
|
||||||
|
|
||||||
Reuse in application grammar: via translations
|
|
||||||
```
|
|
||||||
cat C ---> oper C : Type = T
|
|
||||||
lincat C = T
|
|
||||||
|
|
||||||
fun f : A ---> oper f : A* = t
|
|
||||||
lin f = t
|
|
||||||
```
|
|
||||||
The user only needs to know the type signatures (abstract syntax).
|
|
||||||
|
|
||||||
However, this does not quite guarantee grammaticality, because
|
|
||||||
different categories can have the same lincat:
|
|
||||||
```
|
|
||||||
lincat Conj = {s : Str}
|
|
||||||
lincat Adv = {s : Str}
|
|
||||||
```
|
|
||||||
Thus someone may by accident use "and" as an adverb!
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Forcing the type checker to act as a grammar checker==
|
|
||||||
|
|
||||||
We just have to make linearization types unique for each category.
|
|
||||||
|
|
||||||
The technique is reminiscent of Haskell's ``newtype`` but uses
|
|
||||||
records instead: we add **lock fields** e.g.
|
|
||||||
```
|
|
||||||
lincat Conj = {s : Str ; lock_Conj : {}}
|
|
||||||
lincat Adv = {s : Str ; lock_Adv : {}}
|
|
||||||
```
|
|
||||||
Thanks to record subtyping, the translation is simple:
|
|
||||||
```
|
|
||||||
fun f : C1 -> ... -> Cn -> C
|
|
||||||
lin f = t
|
|
||||||
|
|
||||||
--->
|
|
||||||
|
|
||||||
oper f : C1* -> ... -> Cn* -> C* =
|
|
||||||
\x1,...,xn -> (t x1 ... xn) ** {lock_C = {}}
|
|
||||||
```
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Things to do==
|
|
||||||
|
|
||||||
Better compression of gfc file format.
|
|
||||||
|
|
||||||
Type checking of dependent-type pattern matching in abstract syntax.
|
|
||||||
|
|
||||||
Compilation-related modules that need rewriting
|
|
||||||
- ``ReadFiles``: clarify the logic of dependencies
|
|
||||||
- ``Compile``: clarify the logic of what to do with each module
|
|
||||||
- ``Compute``: make the evaluation more efficient
|
|
||||||
- ``Parsing/*``, ``OldParsing/*``, ``Conversion/*``: reduce the number
|
|
||||||
of parser formats and algorithms
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
graph{
|
|
||||||
|
|
||||||
size = "7,7" ;
|
|
||||||
|
|
||||||
overlap = scale ;
|
|
||||||
|
|
||||||
"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ;
|
|
||||||
|
|
||||||
"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"1" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"2" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"3" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"4" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"5" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"6" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"7" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"8" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"9" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"10" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"11" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "12" [style = "solid"];
|
|
||||||
|
|
||||||
"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "13" [style = "solid"];
|
|
||||||
|
|
||||||
"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "14" [style = "solid"];
|
|
||||||
|
|
||||||
"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "15" [style = "solid"];
|
|
||||||
|
|
||||||
"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "16" [style = "solid"];
|
|
||||||
|
|
||||||
"17" [label = "Polish", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "17" [style = "solid"];
|
|
||||||
|
|
||||||
"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "18" [style = "solid"];
|
|
||||||
|
|
||||||
"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "19" [style = "solid"];
|
|
||||||
|
|
||||||
"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "20" [style = "solid"];
|
|
||||||
|
|
||||||
"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "21" [style = "solid"];
|
|
||||||
|
|
||||||
"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"Abs" -- "22" [style = "solid"];
|
|
||||||
|
|
||||||
"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"Abs" -- "23" [style = "solid"];
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
Before Width: | Height: | Size: 84 KiB |
|
Before Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 31 KiB |
@@ -1,88 +0,0 @@
|
|||||||
digraph {
|
|
||||||
|
|
||||||
gfe [label = "file.gfe", style = "dashed", shape = "ellipse"];
|
|
||||||
gfe -> gf1 [label = " MkConcrete", style = "dashed"];
|
|
||||||
|
|
||||||
gf1 [label = "file.gf", style = "solid", shape = "ellipse"];
|
|
||||||
gf1 -> gf2 [label = " LexGF", style = "solid"];
|
|
||||||
|
|
||||||
gf2 [label = "token list", style = "solid", shape = "plaintext"];
|
|
||||||
gf2 -> gf3 [label = " ParGF", style = "solid"];
|
|
||||||
|
|
||||||
gf3 [label = "source tree", style = "solid", shape = "plaintext"];
|
|
||||||
gf3 -> gf4 [label = " SourceToGrammar", style = "solid"];
|
|
||||||
|
|
||||||
cf [label = "file.cf", style = "dashed", shape = "ellipse"];
|
|
||||||
cf -> gf4 [label = " CF.PPrCF", style = "dashed"];
|
|
||||||
|
|
||||||
ebnf [label = "file.ebnf", style = "dashed", shape = "ellipse"];
|
|
||||||
ebnf -> gf4 [label = " CF.EBNF", style = "dashed"];
|
|
||||||
|
|
||||||
|
|
||||||
gf4 [label = "GF tree", style = "solid", shape = "plaintext"];
|
|
||||||
gf4 -> gf5 [label = " Extend", style = "solid"];
|
|
||||||
|
|
||||||
gf5 [label = "inheritance-linked GF tree", style = "solid", shape = "plaintext"];
|
|
||||||
gf5 -> gf6 [label = " Rename", style = "solid"];
|
|
||||||
|
|
||||||
gf6 [label = "name-resolved GF tree", style = "solid", shape = "plaintext"];
|
|
||||||
gf6 -> gf7 [label = " CheckGrammar", style = "solid"];
|
|
||||||
|
|
||||||
gf7 [label = "type-annotated GF tree", style = "solid", shape = "plaintext"];
|
|
||||||
gf7 -> gf8 [label = " Optimize", style = "solid"];
|
|
||||||
|
|
||||||
gf8 [label = "optimized GF tree", style = "solid", shape = "plaintext"];
|
|
||||||
gf8 -> gf9 [label = " GrammarToCanon", style = "solid"];
|
|
||||||
|
|
||||||
gf9 [label = "GFC tree", style = "solid", shape = "plaintext"];
|
|
||||||
gf9 -> gfc [label = " BackOpt", style = "solid"];
|
|
||||||
|
|
||||||
gfc [label = "optimized GFC tree", style = "solid", shape = "box"];
|
|
||||||
gfc -> gf11 [label = " PrintGFC", style = "solid"];
|
|
||||||
|
|
||||||
gf11 [label = "file.gfc", style = "solid", shape = "ellipse"];
|
|
||||||
|
|
||||||
|
|
||||||
gfcc [label = "file.gfcc", style = "solid", shape = "ellipse"];
|
|
||||||
gfc -> gfcc [label = " CanonToGFCC", style = "solid"];
|
|
||||||
|
|
||||||
mcfg [label = "file.gfcm", style = "dashed", shape = "ellipse"];
|
|
||||||
gfc -> mcfg [label = " PrintGFC", style = "dashed"];
|
|
||||||
|
|
||||||
bnf [label = "file.cf", style = "dashed", shape = "ellipse"];
|
|
||||||
gfc -> bnf [label = " CF.PrLBNF", style = "dashed"];
|
|
||||||
|
|
||||||
happy [label = "file.y (Happy)", style = "dashed", shape = "ellipse"];
|
|
||||||
bnf -> happy [label = " bnfc", style = "dashed"];
|
|
||||||
|
|
||||||
bison [label = "file.y (Bison)", style = "dashed", shape = "ellipse"];
|
|
||||||
bnf -> bison [label = " bnfc", style = "dashed"];
|
|
||||||
|
|
||||||
cup [label = "parser.java (CUP)", style = "dashed", shape = "ellipse"];
|
|
||||||
bnf -> cup [label = " bnfc", style = "dashed"];
|
|
||||||
|
|
||||||
xml [label = "file.dtd (XML)", style = "dashed", shape = "ellipse"];
|
|
||||||
bnf -> xml [label = " bnfc", style = "dashed"];
|
|
||||||
|
|
||||||
cfg [label = "CFG tree", style = "solid", shape = "plaintext"];
|
|
||||||
gfc -> cfg [label = " Conversions.GFC", style = "dashed"];
|
|
||||||
|
|
||||||
cfgm [label = "file.cfgm", style = "dashed", shape = "ellipse"];
|
|
||||||
cfg -> cfgm [label = " Conversions.GFC", style = "dashed"];
|
|
||||||
|
|
||||||
srg [label = "Non-LR CFG", style = "solid", shape = "plaintext"];
|
|
||||||
cfg -> srg [label = " Speech.SRG", style = "dashed"];
|
|
||||||
|
|
||||||
gsl [label = "file.gsl", style = "dashed", shape = "ellipse"];
|
|
||||||
srg -> gsl [label = " Speech.PrGSL", style = "dashed"];
|
|
||||||
|
|
||||||
jsgf [label = "file.jsgf", style = "dashed", shape = "ellipse"];
|
|
||||||
srg -> jsgf [label = " Speech.PrJSGF", style = "dashed"];
|
|
||||||
|
|
||||||
fa [label = "DFA", style = "solid", shape = "plaintext"];
|
|
||||||
cfg -> fa [label = " Speech.CFGToFiniteState", style = "dashed"];
|
|
||||||
|
|
||||||
slf [label = "file.slf", style = "dashed", shape = "ellipse"];
|
|
||||||
fa -> slf [label = " Speech.PrSLF", style = "dashed"];
|
|
||||||
|
|
||||||
}
|
|
||||||
|
Before Width: | Height: | Size: 27 KiB |
@@ -1,350 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
||||||
<HTML>
|
|
||||||
<HEAD>
|
|
||||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
|
||||||
<TITLE>A Birds-Eye View of GF as a Grammar Formalism</TITLE>
|
|
||||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
|
||||||
<P ALIGN="center"><CENTER><H1>A Birds-Eye View of GF as a Grammar Formalism</H1>
|
|
||||||
<FONT SIZE="4">
|
|
||||||
<I>Author: Aarne Ranta</I><BR>
|
|
||||||
Last update: Thu Feb 2 14:16:01 2006
|
|
||||||
</FONT></CENTER>
|
|
||||||
|
|
||||||
<P></P>
|
|
||||||
<HR NOSHADE SIZE=1>
|
|
||||||
<P></P>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc1">GF in a few words</A>
|
|
||||||
<LI><A HREF="#toc2">History of GF</A>
|
|
||||||
<LI><A HREF="#toc3">Some key ingredients of GF in other grammar formalisms</A>
|
|
||||||
<LI><A HREF="#toc4">Examples of descriptions in each formalism</A>
|
|
||||||
<LI><A HREF="#toc5">Lambda terms and records</A>
|
|
||||||
<LI><A HREF="#toc6">The structure of GF formalisms</A>
|
|
||||||
<LI><A HREF="#toc7">The expressivity of GF</A>
|
|
||||||
<LI><A HREF="#toc8">Grammars and parsing</A>
|
|
||||||
<LI><A HREF="#toc9">Grammars as software libraries</A>
|
|
||||||
<LI><A HREF="#toc10">Multilinguality</A>
|
|
||||||
<LI><A HREF="#toc11">Parametrized modules</A>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P></P>
|
|
||||||
<HR NOSHADE SIZE=1>
|
|
||||||
<P></P>
|
|
||||||
<P>
|
|
||||||
<IMG ALIGN="middle" SRC="Logos/gf0.png" BORDER="0" ALT="">
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<I>Abstract. This document gives a general description of the</I>
|
|
||||||
<I>Grammatical Framework (GF), with comparisons to other grammar</I>
|
|
||||||
<I>formalisms such as CG, ACG, HPSG, and LFG.</I>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc1"></A>
|
|
||||||
<H2>GF in a few words</H2>
|
|
||||||
<P>
|
|
||||||
Grammatical Framework (GF) is a grammar formalism
|
|
||||||
based on <B>constructive type theory</B>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
GF makes a distinction between <B>abstract syntax</B> and <B>concrete syntax</B>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The abstract syntax part of GF is a <B>logical framework</B>, with
|
|
||||||
dependent types and higher-order functions.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The concrete syntax is a system of <B>records</B> containing strings and features.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
A GF grammar defines a <B>reversible homomorphism</B> from an abstract syntax to a
|
|
||||||
concrete syntax.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
A <B>multilingual GF grammar</B> is a set of concrete syntaxes associated with
|
|
||||||
one abstract syntax.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
GF grammars are written in a high-level <B>functional programming language</B>,
|
|
||||||
which is compiled into a <B>core language</B> (GFC).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
GF grammars can be used as <B>resources</B>, i.e. as libraries for writing
|
|
||||||
new grammars; these are compiled and optimized by the method of
|
|
||||||
<B>grammar composition</B>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
GF has a <B>module system</B> that supports grammar engineering and separate
|
|
||||||
compilation.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc2"></A>
|
|
||||||
<H2>History of GF</H2>
|
|
||||||
<P>
|
|
||||||
1988. Intuitionistic Categorial Grammar; type theory as abstract syntax,
|
|
||||||
playing the role of Montague's analysis trees. Grammars implemented in Prolog.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
1994. Type-Theoretical Grammar. Abstract syntax organized as a system of
|
|
||||||
combinators. Grammars implemented in ALF.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
1996. Multilingual Type-Theoretical Grammar. Rules for generating six
|
|
||||||
languages from the same abstract syntax. Grammars implemented in ALF, ML, and
|
|
||||||
Haskell.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
1998. The first implementation of GF as a language of its own.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
2000. New version of GF: high-level functional source language, records used
|
|
||||||
for concrete syntax.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
2003. The module system.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
2004. Ljunglöf's thesis <I>Expressivity and Complexity of GF</I>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc3"></A>
|
|
||||||
<H2>Some key ingredients of GF in other grammar formalisms</H2>
|
|
||||||
<UL>
|
|
||||||
<LI>[GF ]: Grammatical Framework
|
|
||||||
<LI>[CG ]: categorial grammar
|
|
||||||
<LI>[ACG ]: abstract categorial grammar
|
|
||||||
<LI>[HPSG ]: head-driven phrase structure grammar
|
|
||||||
<LI>[LFG ]: lexical functional grammar
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<TABLE CELLPADDING="4" BORDER="1">
|
|
||||||
<TR>
|
|
||||||
<TD ALIGN="center">/</TD>
|
|
||||||
<TD>GF</TD>
|
|
||||||
<TD>ACG</TD>
|
|
||||||
<TD>LFG</TD>
|
|
||||||
<TD>HPSG</TD>
|
|
||||||
<TD>CG</TD>
|
|
||||||
</TR>
|
|
||||||
<TR>
|
|
||||||
<TD>abstract vs concrete syntax</TD>
|
|
||||||
<TD>X</TD>
|
|
||||||
<TD>X</TD>
|
|
||||||
<TD>?</TD>
|
|
||||||
<TD>-</TD>
|
|
||||||
<TD>-</TD>
|
|
||||||
</TR>
|
|
||||||
<TR>
|
|
||||||
<TD>type theory</TD>
|
|
||||||
<TD>X</TD>
|
|
||||||
<TD>X</TD>
|
|
||||||
<TD>-</TD>
|
|
||||||
<TD>-</TD>
|
|
||||||
<TD>X</TD>
|
|
||||||
</TR>
|
|
||||||
<TR>
|
|
||||||
<TD>records and features</TD>
|
|
||||||
<TD>X</TD>
|
|
||||||
<TD>-</TD>
|
|
||||||
<TD>X</TD>
|
|
||||||
<TD>X</TD>
|
|
||||||
<TD>-</TD>
|
|
||||||
</TR>
|
|
||||||
</TABLE>
|
|
||||||
|
|
||||||
<P></P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc4"></A>
|
|
||||||
<H2>Examples of descriptions in each formalism</H2>
|
|
||||||
<P>
|
|
||||||
To be written...
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc5"></A>
|
|
||||||
<H2>Lambda terms and records</H2>
|
|
||||||
<P>
|
|
||||||
In CS, abstract syntax is trees and concrete syntax is strings.
|
|
||||||
This works more or less for programming languages.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In CG, all syntax is lambda terms.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In Montague grammar, abstract syntax is lambda terms and
|
|
||||||
concrete syntax is trees. Abstract syntax as lambda terms
|
|
||||||
can be considered well-established.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In PATR and HPSG, concrete syntax it records. This can be considered
|
|
||||||
well-established for natural languages.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In ACG, both are lambda terms. This is more general than GF,
|
|
||||||
but reversibility requires linearity restriction, which can be
|
|
||||||
unnatural for grammar writing.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In GF, linearization from lambda terms to records is reversible,
|
|
||||||
and grammar writing is not restricted to linear terms.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Grammar composition in ACG is just function composition. In GF,
|
|
||||||
it is more restricted...
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc6"></A>
|
|
||||||
<H2>The structure of GF formalisms</H2>
|
|
||||||
<P>
|
|
||||||
The following diagram (to be drawn properly!) describes the
|
|
||||||
levels.
|
|
||||||
</P>
|
|
||||||
<PRE>
|
|
||||||
| programming language design
|
|
||||||
V
|
|
||||||
GF source language
|
|
||||||
|
|
|
||||||
| type-directed partial evaluation
|
|
||||||
V
|
|
||||||
GFC assembly language
|
|
||||||
|
|
|
||||||
| Ljunglöf's translation
|
|
||||||
V
|
|
||||||
MCFG parser
|
|
||||||
</PRE>
|
|
||||||
<P>
|
|
||||||
The last two phases are nontrivial mathematica properties.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
In most grammar formalisms, grammarians have to work on the GFC
|
|
||||||
(or MCFG) level.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Maybe they use macros - they are therefore like macro assemblers. But there
|
|
||||||
are no separately compiled library modules, no type checking, etc.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc7"></A>
|
|
||||||
<H2>The expressivity of GF</H2>
|
|
||||||
<P>
|
|
||||||
Parsing complexity is the same as MCFG: polynomial, with
|
|
||||||
unrestricted exponent depending on grammar.
|
|
||||||
This is between TAG and HPSG.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If semantic well-formedness (type theory) is taken into account,
|
|
||||||
then arbitrary logic can be expressed. The well-formedness of
|
|
||||||
abstract syntax is decidable, but the well-formedness of a
|
|
||||||
concrete-syntax string can require an arbitrary proof construction
|
|
||||||
and is therefore undecidable.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF
|
|
||||||
has the goal of assigning intended trees for strings. This is
|
|
||||||
generalized to shared trees for different languages.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The high-level language strives after the properties of
|
|
||||||
writability and readability (programming language notions).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc8"></A>
|
|
||||||
<H2>Grammars and parsing</H2>
|
|
||||||
<P>
|
|
||||||
In many projects, a grammar is just seen as a <B>declarative parsing program</B>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
For GF, a grammar is primarily the <B>definition of a language</B>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Detaching grammars from parsers is a good idea, giving
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>more efficient and robust parsing (statistical etc)
|
|
||||||
<LI>cleaner grammars
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Separating abstract from concrete syntax is a prerequisite for this:
|
|
||||||
we want parsers to return abstract syntax objects, and these must exist
|
|
||||||
independently of parse trees.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
A possible radical approach to parsing:
|
|
||||||
use a grammar to generate a treebank and machine-learn
|
|
||||||
a statistical parser from this.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Comparison: Steedman in CCG has done something like this.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc9"></A>
|
|
||||||
<H2>Grammars as software libraries</H2>
|
|
||||||
<P>
|
|
||||||
Reuse for different purposes.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Grammar composition.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc10"></A>
|
|
||||||
<H2>Multilinguality</H2>
|
|
||||||
<P>
|
|
||||||
In <B>application grammars</B>, the AS is a semantic
|
|
||||||
model, and a CS covers domain terminology and idioms.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
This can give publication-quality translation on
|
|
||||||
limited domains (e.g. the WebALT project).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Resource grammars with grammar composition lead to
|
|
||||||
<B>compile-time transfer</B>.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
When is <B>run-time transfer</B> necessary?
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Cf. CLE (Core Language Engine).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<!-- NEW -->
|
|
||||||
</P>
|
|
||||||
<A NAME="toc11"></A>
|
|
||||||
<H2>Parametrized modules</H2>
|
|
||||||
<P>
|
|
||||||
This notion comes from the ML language in the 1980's.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
It can be used for sharing even more code between languages
|
|
||||||
than their AS.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Especially, for related languages (Scandinavian, Romance).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Cf. grammar porting in CLE: what they do with untyped
|
|
||||||
macro packages GF does with typable interfaces.
|
|
||||||
</P>
|
|
||||||
|
|
||||||
<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) -->
|
|
||||||
<!-- cmdline: txt2tags -thtml -\-toc gf-formalism.txt -->
|
|
||||||
</BODY></HTML>
|
|
||||||
@@ -1,279 +0,0 @@
|
|||||||
A Birds-Eye View of GF as a Grammar Formalism
|
|
||||||
Author: Aarne Ranta
|
|
||||||
Last update: %%date(%c)
|
|
||||||
|
|
||||||
% NOTE: this is a txt2tags file.
|
|
||||||
% Create an html file from this file using:
|
|
||||||
% txt2tags -thtml --toc gf-formalism.txt
|
|
||||||
|
|
||||||
%!target:html
|
|
||||||
|
|
||||||
%!postproc(html): #NEW <!-- NEW -->
|
|
||||||
|
|
||||||
[Logos/gf0.png]
|
|
||||||
|
|
||||||
//Abstract. This document gives a general description of the//
|
|
||||||
//Grammatical Framework (GF), with comparisons to other grammar//
|
|
||||||
//formalisms such as CG, ACG, HPSG, and LFG.//
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Logical Frameworks and Grammar Formalisms==
|
|
||||||
|
|
||||||
Logic - formalization of mathematics (mathematical language?)
|
|
||||||
|
|
||||||
Linguistics - formalization of natural language
|
|
||||||
|
|
||||||
Since math lang is a subset, we can expect similarities.
|
|
||||||
|
|
||||||
But in natural language we have
|
|
||||||
- masses of empirical data
|
|
||||||
- no right of reform
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==High-level programming==
|
|
||||||
|
|
||||||
We have to write a lot of program code when formalizing language.
|
|
||||||
|
|
||||||
We need a language with proper abstractions.
|
|
||||||
|
|
||||||
Cf. Paul Graham on Prolog: very high-level, but wrong abstractions.
|
|
||||||
|
|
||||||
Typed functional languages work well in maths.
|
|
||||||
|
|
||||||
We have developed one for linguistics
|
|
||||||
- some extra constructs, e.g. inflection tables
|
|
||||||
- constraint of reversibility (nontrivial math problem)
|
|
||||||
|
|
||||||
|
|
||||||
Writing a grammar of e.g. French clitics should not be a topic
|
|
||||||
on which one can write a paper - it should be easy to render in code
|
|
||||||
the known facts about languages!
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==GF in a few words==
|
|
||||||
|
|
||||||
Grammatical Framework (GF) is a grammar formalism
|
|
||||||
based on **constructive type theory**.
|
|
||||||
|
|
||||||
GF makes a distinction between **abstract syntax** and **concrete syntax**.
|
|
||||||
|
|
||||||
The abstract syntax part of GF is a **logical framework**, with
|
|
||||||
dependent types and higher-order functions.
|
|
||||||
|
|
||||||
The concrete syntax is a system of **records** containing strings and features.
|
|
||||||
|
|
||||||
A GF grammar defines a **reversible homomorphism** from an abstract syntax to a
|
|
||||||
concrete syntax.
|
|
||||||
|
|
||||||
A **multilingual GF grammar** is a set of concrete syntaxes associated with
|
|
||||||
one abstract syntax.
|
|
||||||
|
|
||||||
GF grammars are written in a high-level **functional programming language**,
|
|
||||||
which is compiled into a **core language** (GFC).
|
|
||||||
|
|
||||||
GF grammars can be used as **resources**, i.e. as libraries for writing
|
|
||||||
new grammars; these are compiled and optimized by the method of
|
|
||||||
**grammar composition**.
|
|
||||||
|
|
||||||
GF has a **module system** that supports grammar engineering and separate
|
|
||||||
compilation.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==History of GF==
|
|
||||||
|
|
||||||
1988. Intuitionistic Categorial Grammar; type theory as abstract syntax,
|
|
||||||
playing the role of Montague's analysis trees. Grammars implemented in Prolog.
|
|
||||||
|
|
||||||
1994. Type-Theoretical Grammar. Abstract syntax organized as a system of
|
|
||||||
combinators. Grammars implemented in ALF.
|
|
||||||
|
|
||||||
1996. Multilingual Type-Theoretical Grammar. Rules for generating six
|
|
||||||
languages from the same abstract syntax. Grammars implemented in ALF, ML, and
|
|
||||||
Haskell.
|
|
||||||
|
|
||||||
1998. The first implementation of GF as a language of its own.
|
|
||||||
|
|
||||||
2000. New version of GF: high-level functional source language, records used
|
|
||||||
for concrete syntax.
|
|
||||||
|
|
||||||
2003. The module system.
|
|
||||||
|
|
||||||
2004. Ljunglöf's thesis //Expressivity and Complexity of GF//.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Some key ingredients of GF in other grammar formalisms==
|
|
||||||
|
|
||||||
- [GF ]: Grammatical Framework
|
|
||||||
- [CG ]: categorial grammar
|
|
||||||
- [ACG ]: abstract categorial grammar
|
|
||||||
- [HPSG ]: head-driven phrase structure grammar
|
|
||||||
- [LFG ]: lexical functional grammar
|
|
||||||
|
|
||||||
|
|
||||||
| / | GF | ACG | LFG | HPSG | CG |
|
|
||||||
| abstract vs concrete syntax | X | X | ? | - | - |
|
|
||||||
| type theory | X | X | - | - | X |
|
|
||||||
| records and features | X | - | X | X | - |
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Examples of descriptions in each formalism==
|
|
||||||
|
|
||||||
To be written...
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Lambda terms and records==
|
|
||||||
|
|
||||||
In CS, abstract syntax is trees and concrete syntax is strings.
|
|
||||||
This works more or less for programming languages.
|
|
||||||
|
|
||||||
In CG, all syntax is lambda terms.
|
|
||||||
|
|
||||||
In Montague grammar, abstract syntax is lambda terms and
|
|
||||||
concrete syntax is trees. Abstract syntax as lambda terms
|
|
||||||
can be considered well-established.
|
|
||||||
|
|
||||||
In PATR and HPSG, concrete syntax it records. This can be considered
|
|
||||||
well-established for natural languages.
|
|
||||||
|
|
||||||
In ACG, both are lambda terms. This is more general than GF,
|
|
||||||
but reversibility requires linearity restriction, which can be
|
|
||||||
unnatural for grammar writing.
|
|
||||||
|
|
||||||
In GF, linearization from lambda terms to records is reversible,
|
|
||||||
and grammar writing is not restricted to linear terms.
|
|
||||||
|
|
||||||
Grammar composition in ACG is just function composition. In GF,
|
|
||||||
it is more restricted...
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The structure of GF formalisms==
|
|
||||||
|
|
||||||
The following diagram (to be drawn properly!) describes the
|
|
||||||
levels.
|
|
||||||
```
|
|
||||||
| programming language design
|
|
||||||
V
|
|
||||||
GF source language
|
|
||||||
|
|
|
||||||
| type-directed partial evaluation
|
|
||||||
V
|
|
||||||
GFC assembly language
|
|
||||||
|
|
|
||||||
| Ljunglöf's translation
|
|
||||||
V
|
|
||||||
MCFG parser
|
|
||||||
```
|
|
||||||
The last two phases are nontrivial mathematica properties.
|
|
||||||
|
|
||||||
In most grammar formalisms, grammarians have to work on the GFC
|
|
||||||
(or MCFG) level.
|
|
||||||
|
|
||||||
Maybe they use macros - they are therefore like macro assemblers. But there
|
|
||||||
are no separately compiled library modules, no type checking, etc.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==The expressivity of GF==
|
|
||||||
|
|
||||||
Parsing complexity is the same as MCFG: polynomial, with
|
|
||||||
unrestricted exponent depending on grammar.
|
|
||||||
This is between TAG and HPSG.
|
|
||||||
|
|
||||||
If semantic well-formedness (type theory) is taken into account,
|
|
||||||
then arbitrary logic can be expressed. The well-formedness of
|
|
||||||
abstract syntax is decidable, but the well-formedness of a
|
|
||||||
concrete-syntax string can require an arbitrary proof construction
|
|
||||||
and is therefore undecidable.
|
|
||||||
|
|
||||||
Separability between AS and CS: like TAG (Tree Adjoining Grammar), GF
|
|
||||||
has the goal of assigning intended trees for strings. This is
|
|
||||||
generalized to shared trees for different languages.
|
|
||||||
|
|
||||||
The high-level language strives after the properties of
|
|
||||||
writability and readability (programming language notions).
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Grammars and parsing==
|
|
||||||
|
|
||||||
In many projects, a grammar is just seen as a **declarative parsing program**.
|
|
||||||
|
|
||||||
For GF, a grammar is primarily the **definition of a language**.
|
|
||||||
|
|
||||||
Detaching grammars from parsers is a good idea, giving
|
|
||||||
- more efficient and robust parsing (statistical etc)
|
|
||||||
- cleaner grammars
|
|
||||||
|
|
||||||
|
|
||||||
Separating abstract from concrete syntax is a prerequisite for this:
|
|
||||||
we want parsers to return abstract syntax objects, and these must exist
|
|
||||||
independently of parse trees.
|
|
||||||
|
|
||||||
A possible radical approach to parsing:
|
|
||||||
use a grammar to generate a treebank and machine-learn
|
|
||||||
a statistical parser from this.
|
|
||||||
|
|
||||||
Comparison: Steedman in CCG has done something like this.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Grammars as software libraries==
|
|
||||||
|
|
||||||
Reuse for different purposes.
|
|
||||||
|
|
||||||
Grammar composition.
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Multilinguality==
|
|
||||||
|
|
||||||
In **application grammars**, the AS is a semantic
|
|
||||||
model, and a CS covers domain terminology and idioms.
|
|
||||||
|
|
||||||
This can give publication-quality translation on
|
|
||||||
limited domains (e.g. the WebALT project).
|
|
||||||
|
|
||||||
Resource grammars with grammar composition lead to
|
|
||||||
**compile-time transfer**.
|
|
||||||
|
|
||||||
When is **run-time transfer** necessary?
|
|
||||||
|
|
||||||
Cf. CLE (Core Language Engine).
|
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
|
||||||
|
|
||||||
==Parametrized modules==
|
|
||||||
|
|
||||||
This notion comes from the ML language in the 1980's.
|
|
||||||
|
|
||||||
It can be used for sharing even more code between languages
|
|
||||||
than their AS.
|
|
||||||
|
|
||||||
Especially, for related languages (Scandinavian, Romance).
|
|
||||||
|
|
||||||
Cf. grammar porting in CLE: what they do with untyped
|
|
||||||
macro packages GF does with typable interfaces.
|
|
||||||
@@ -1,311 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
||||||
<HTML>
|
|
||||||
<HEAD>
|
|
||||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
|
||||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
|
|
||||||
<TITLE>GF Project Ideas</TITLE>
|
|
||||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
|
||||||
|
|
||||||
<P>
|
|
||||||
<center>
|
|
||||||
<IMG ALIGN="middle" SRC="Logos/gf0.png" BORDER="0" ALT="">
|
|
||||||
</center>
|
|
||||||
</P>
|
|
||||||
|
|
||||||
<P ALIGN="center"><CENTER>
|
|
||||||
<H1>GF Project Ideas</H1>
|
|
||||||
<FONT SIZE="4">
|
|
||||||
<I>Resource Grammars, Web Applications, etc</I><BR>
|
|
||||||
contact: Aarne Ranta (aarne at chalmers dot se)
|
|
||||||
</FONT></CENTER>
|
|
||||||
|
|
||||||
<P></P>
|
|
||||||
<HR NOSHADE SIZE=1>
|
|
||||||
<P></P>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc1">Resource Grammar Implementations</A>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc2">Tasks</A>
|
|
||||||
<LI><A HREF="#toc3">Who is qualified</A>
|
|
||||||
<LI><A HREF="#toc4">The Summer School</A>
|
|
||||||
</UL>
|
|
||||||
<LI><A HREF="#toc5">Other project ideas</A>
|
|
||||||
<UL>
|
|
||||||
<LI><A HREF="#toc6">GF interpreter in Java</A>
|
|
||||||
<LI><A HREF="#toc7">GF interpreter in C#</A>
|
|
||||||
<LI><A HREF="#toc8">GF localization library</A>
|
|
||||||
<LI><A HREF="#toc9">Multilingual grammar applications for mobile phones</A>
|
|
||||||
<LI><A HREF="#toc10">Multilingual grammar applications for the web</A>
|
|
||||||
<LI><A HREF="#toc11">GMail gadget for GF</A>
|
|
||||||
</UL>
|
|
||||||
<LI><A HREF="#toc12">Dissemination and intellectual property</A>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P></P>
|
|
||||||
<HR NOSHADE SIZE=1>
|
|
||||||
<P></P>
|
|
||||||
<A NAME="toc1"></A>
|
|
||||||
<H2>Resource Grammar Implementations</H2>
|
|
||||||
<P>
|
|
||||||
GF Resource Grammar Library is an open-source computational grammar resource
|
|
||||||
that currently covers 12 languages.
|
|
||||||
The Library is a collaborative effort to which programmers from many countries
|
|
||||||
have contributed. The next goal is to extend the library
|
|
||||||
to all of the 23 official EU languages. Also other languages
|
|
||||||
are welcome all the time. The following diagram show the current status of the
|
|
||||||
library. Each of the red and yellow ones are a potential project.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<center>
|
|
||||||
<IMG ALIGN="middle" SRC="school-langs.png" BORDER="0" ALT="">
|
|
||||||
</center>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<I>red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu</I>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The linguistic coverage of the library includes the inflectional morphology
|
|
||||||
and basic syntax of each language. It can be used in GF applications
|
|
||||||
and also ported to other formats. It can also be used for building other
|
|
||||||
linguistic resources, such as morphological lexica and parsers.
|
|
||||||
The library is licensed under LGPL.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc2"></A>
|
|
||||||
<H3>Tasks</H3>
|
|
||||||
<P>
|
|
||||||
Writing a grammar for a language is usually easier if other languages
|
|
||||||
from the same family already have grammars. The colours have the same
|
|
||||||
meaning as in the diagram above; in addition, we use boldface for the
|
|
||||||
red, still unimplemented languages and italics for the
|
|
||||||
orange languages in progress. Thus, in particular, each of the languages
|
|
||||||
coloured red below are possible programming projects.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Baltic:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="red"><b> Latvian </b></font>
|
|
||||||
<LI><font color="red"><b> Lithuanian </b></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Celtic:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="red"><b> Irish </b></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Fenno-Ugric:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="red"><b> Estonian </b></font>
|
|
||||||
<LI><font color="green" size="-1"> Finnish </font>
|
|
||||||
<LI><font color="red"><b> Hungarian </b></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Germanic:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="green" size="-1"> Danish </font>
|
|
||||||
<LI><font color="red"><b> Dutch </b></font>
|
|
||||||
<LI><font color="green" size="-1"> English </font>
|
|
||||||
<LI><font color="green" size="-1"> German </font>
|
|
||||||
<LI><font color="green" size="-1"> Norwegian </font>
|
|
||||||
<LI><font color="green" size="-1"> Swedish </font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Hellenic:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="red"><b> Greek </b></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Indo-Iranian:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="orange"><i> Hindi </i></font>
|
|
||||||
<LI><font color="orange"><i> Urdu </i></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Romance:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="green" size="-1"> Catalan </font>
|
|
||||||
<LI><font color="green" size="-1"> French </font>
|
|
||||||
<LI><font color="green" size="-1"> Italian </font>
|
|
||||||
<LI><font color="red"><b> Portuguese </b></font>
|
|
||||||
<LI><font color="orange"><i> Romanian </i></font>
|
|
||||||
<LI><font color="green" size="-1"> Spanish </font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Semitic:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="orange"><i> Arabic </i></font>
|
|
||||||
<LI><font color="red"><b> Maltese </b></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Slavonic:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="green" size="-1"> Bulgarian </font>
|
|
||||||
<LI><font color="red"><b> Czech </b></font>
|
|
||||||
<LI><font color="orange"><i> Polish </i></font>
|
|
||||||
<LI><font color="green" size="-1"> Russian </font>
|
|
||||||
<LI><font color="red"><b> Slovak </b></font>
|
|
||||||
<LI><font color="red"><b> Slovenian </b></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Tai:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="orange"><i> Thai </i></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Turkic:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><font color="orange"><i> Turkish </i></font>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<A NAME="toc3"></A>
|
|
||||||
<H3>Who is qualified</H3>
|
|
||||||
<P>
|
|
||||||
Writing a resource grammar implementation requires good general programming
|
|
||||||
skills, and a good explicit knowledge of the grammar of the target language.
|
|
||||||
A typical participant could be
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>native or fluent speaker of the target language
|
|
||||||
<LI>interested in languages on the theoretical level, and preferably familiar
|
|
||||||
with many languages (to be able to think about them on an abstract level)
|
|
||||||
<LI>familiar with functional programming languages such as ML or Haskell
|
|
||||||
(GF itself is a language similar to these)
|
|
||||||
<LI>on Master's or PhD level in linguistics, computer science, or mathematics
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
But it is the quality of the assignment that is assessed, not any formal
|
|
||||||
requirements. The "typical participant" was described to give an idea of
|
|
||||||
who is likely to succeed in this.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc4"></A>
|
|
||||||
<H3>The Summer School</H3>
|
|
||||||
<P>
|
|
||||||
A Summer School on resource grammars and applications will
|
|
||||||
be organized at the campus of Chalmers University of Technology in Gothenburg,
|
|
||||||
Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in
|
|
||||||
a resource grammar project; the participants are assumed to learn GF before
|
|
||||||
the Summer School, but how far they have come in their projects may vary.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
More information on the Summer School web page:
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html"><CODE>http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html</CODE></A>
|
|
||||||
</P>
|
|
||||||
<A NAME="toc5"></A>
|
|
||||||
<H2>Other project ideas</H2>
|
|
||||||
<A NAME="toc6"></A>
|
|
||||||
<H3>GF interpreter in Java</H3>
|
|
||||||
<P>
|
|
||||||
The idea is to write a run-time system for GF grammars in Java. This enables
|
|
||||||
the use of <B>embedded grammars</B> in Java applications. This project is
|
|
||||||
a fresh-up of <A HREF="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">earlier work</A>,
|
|
||||||
now using the new run-time format PGF and addressing a new parsing algorithm.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Requirements: Java, Haskell, basics of compilers and parsing algorithms.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc7"></A>
|
|
||||||
<H3>GF interpreter in C#</H3>
|
|
||||||
<P>
|
|
||||||
The idea is to write a run-time system for GF grammars in C#. This enables
|
|
||||||
the use of <B>embedded grammars</B> in C# applications. This project is
|
|
||||||
similar to <A HREF="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">earlier work</A>
|
|
||||||
on Java, now addressing C# and using the new run-time format PGF.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Requirements: C#, Haskell, basics of compilers and parsing algorithms.
|
|
||||||
</P>
|
|
||||||
<A NAME="toc8"></A>
|
|
||||||
<H3>GF localization library</H3>
|
|
||||||
<P>
|
|
||||||
This is an idea for a software localization library using GF grammars.
|
|
||||||
The library should replace strings by grammar rules, which can be conceived
|
|
||||||
as very smart templates always guaranteeing grammatically correct output.
|
|
||||||
The library should be based on the
|
|
||||||
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html">GF Resource Grammar Library</A>, providing infrastructure
|
|
||||||
currently for 12 languages.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Requirements: GF, some natural languages, some localization platform
|
|
||||||
</P>
|
|
||||||
<A NAME="toc9"></A>
|
|
||||||
<H3>Multilingual grammar applications for mobile phones</H3>
|
|
||||||
<P>
|
|
||||||
GF grammars can be compiled into programs that can be run on different
|
|
||||||
platforms, such as web browsers and mobile phones. An example is a
|
|
||||||
<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html">numeral translator</A> running on both these platforms.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The proposed project is rather open: find some cool applications of
|
|
||||||
the technology that are useful or entertaining for mobile phone users. A
|
|
||||||
part of the project is to investigate implementation issues such as making
|
|
||||||
the best use of the phone's resources. Possible applications have
|
|
||||||
something to do with translation; one suggestion is an sms editor/translator.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Requirements: GF, JavaScript, some phone application development tools
|
|
||||||
</P>
|
|
||||||
<A NAME="toc10"></A>
|
|
||||||
<H3>Multilingual grammar applications for the web</H3>
|
|
||||||
<P>
|
|
||||||
This project is rather open: find some cool applications of
|
|
||||||
the technology that are useful or entertaining on the web. Examples include
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>translators: see <A HREF="http://tournesol.cs.chalmers.se:41296/translate">demo</A>
|
|
||||||
<LI>multilingual wikis: see <A HREF="http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi">demo</A>
|
|
||||||
<LI>fridge magnets: see <A HREF="http://tournesol.cs.chalmers.se:41296/fridge">demo</A>
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI
|
|
||||||
</P>
|
|
||||||
<A NAME="toc11"></A>
|
|
||||||
<H3>GMail gadget for GF</H3>
|
|
||||||
<P>
|
|
||||||
It is possible to add custom gadgets to GMail. If you are going to write
|
|
||||||
e-mail in a foreign language then you probably will need help from
|
|
||||||
dictonary or you may want to check something in the grammar. GF provides
|
|
||||||
all resources that you may need but you have to think about how to
|
|
||||||
design gadget that fits well in the GMail environment and what
|
|
||||||
functionality from GF you want to expose.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Requirements: GF, Google Web Toolkit
|
|
||||||
</P>
|
|
||||||
<A NAME="toc12"></A>
|
|
||||||
<H2>Dissemination and intellectual property</H2>
|
|
||||||
<P>
|
|
||||||
All code suggested here will be released under the LGPL just like
|
|
||||||
the current resource grammars and run-time GF libraries,
|
|
||||||
with the copyright held by respective authors.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
As a rule, the code will be distributed via the GF web site.
|
|
||||||
</P>
|
|
||||||
|
|
||||||
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
|
|
||||||
<!-- cmdline: txt2tags -\-toc gf-ideas.txt -->
|
|
||||||
</BODY></HTML>
|
|
||||||
@@ -1,231 +0,0 @@
|
|||||||
GF Project Ideas
|
|
||||||
Resource Grammars, Web Applications, etc
|
|
||||||
contact: Aarne Ranta (aarne at chalmers dot se)
|
|
||||||
|
|
||||||
%!Encoding : iso-8859-1
|
|
||||||
|
|
||||||
%!target:html
|
|
||||||
%!postproc(html): #BECE <center>
|
|
||||||
%!postproc(html): #ENCE </center>
|
|
||||||
%!postproc(html): #GRAY <font color="green" size="-1">
|
|
||||||
%!postproc(html): #EGRAY </font>
|
|
||||||
%!postproc(html): #RED <font color="red"><b>
|
|
||||||
%!postproc(html): #YELLOW <font color="orange"><i>
|
|
||||||
%!postproc(html): #ERED </b></font>
|
|
||||||
%!postproc(html): #EYELLOW </i></font>
|
|
||||||
|
|
||||||
#BECE
|
|
||||||
[Logos/gf0.png]
|
|
||||||
#ENCE
|
|
||||||
|
|
||||||
|
|
||||||
==Resource Grammar Implementations==
|
|
||||||
|
|
||||||
GF Resource Grammar Library is an open-source computational grammar resource
|
|
||||||
that currently covers 12 languages.
|
|
||||||
The Library is a collaborative effort to which programmers from many countries
|
|
||||||
have contributed. The next goal is to extend the library
|
|
||||||
to all of the 23 official EU languages. Also other languages
|
|
||||||
are welcome all the time. The following diagram show the current status of the
|
|
||||||
library. Each of the red and yellow ones are a potential project.
|
|
||||||
|
|
||||||
#BECE
|
|
||||||
[school-langs.png]
|
|
||||||
#ENCE
|
|
||||||
|
|
||||||
|
|
||||||
//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu//
|
|
||||||
|
|
||||||
The linguistic coverage of the library includes the inflectional morphology
|
|
||||||
and basic syntax of each language. It can be used in GF applications
|
|
||||||
and also ported to other formats. It can also be used for building other
|
|
||||||
linguistic resources, such as morphological lexica and parsers.
|
|
||||||
The library is licensed under LGPL.
|
|
||||||
|
|
||||||
|
|
||||||
===Tasks===
|
|
||||||
|
|
||||||
Writing a grammar for a language is usually easier if other languages
|
|
||||||
from the same family already have grammars. The colours have the same
|
|
||||||
meaning as in the diagram above; in addition, we use boldface for the
|
|
||||||
red, still unimplemented languages and italics for the
|
|
||||||
orange languages in progress. Thus, in particular, each of the languages
|
|
||||||
coloured red below are possible programming projects.
|
|
||||||
|
|
||||||
Baltic:
|
|
||||||
- #RED Latvian #ERED
|
|
||||||
- #RED Lithuanian #ERED
|
|
||||||
|
|
||||||
|
|
||||||
Celtic:
|
|
||||||
- #RED Irish #ERED
|
|
||||||
|
|
||||||
|
|
||||||
Fenno-Ugric:
|
|
||||||
- #RED Estonian #ERED
|
|
||||||
- #GRAY Finnish #EGRAY
|
|
||||||
- #RED Hungarian #ERED
|
|
||||||
|
|
||||||
|
|
||||||
Germanic:
|
|
||||||
- #GRAY Danish #EGRAY
|
|
||||||
- #RED Dutch #ERED
|
|
||||||
- #GRAY English #EGRAY
|
|
||||||
- #GRAY German #EGRAY
|
|
||||||
- #GRAY Norwegian #EGRAY
|
|
||||||
- #GRAY Swedish #EGRAY
|
|
||||||
|
|
||||||
|
|
||||||
Hellenic:
|
|
||||||
- #RED Greek #ERED
|
|
||||||
|
|
||||||
|
|
||||||
Indo-Iranian:
|
|
||||||
- #YELLOW Hindi #EYELLOW
|
|
||||||
- #YELLOW Urdu #EYELLOW
|
|
||||||
|
|
||||||
|
|
||||||
Romance:
|
|
||||||
- #GRAY Catalan #EGRAY
|
|
||||||
- #GRAY French #EGRAY
|
|
||||||
- #GRAY Italian #EGRAY
|
|
||||||
- #RED Portuguese #ERED
|
|
||||||
- #YELLOW Romanian #EYELLOW
|
|
||||||
- #GRAY Spanish #EGRAY
|
|
||||||
|
|
||||||
|
|
||||||
Semitic:
|
|
||||||
- #YELLOW Arabic #EYELLOW
|
|
||||||
- #RED Maltese #ERED
|
|
||||||
|
|
||||||
|
|
||||||
Slavonic:
|
|
||||||
- #GRAY Bulgarian #EGRAY
|
|
||||||
- #RED Czech #ERED
|
|
||||||
- #YELLOW Polish #EYELLOW
|
|
||||||
- #GRAY Russian #EGRAY
|
|
||||||
- #RED Slovak #ERED
|
|
||||||
- #RED Slovenian #ERED
|
|
||||||
|
|
||||||
|
|
||||||
Tai:
|
|
||||||
- #YELLOW Thai #EYELLOW
|
|
||||||
|
|
||||||
|
|
||||||
Turkic:
|
|
||||||
- #YELLOW Turkish #EYELLOW
|
|
||||||
|
|
||||||
|
|
||||||
===Who is qualified===
|
|
||||||
|
|
||||||
Writing a resource grammar implementation requires good general programming
|
|
||||||
skills, and a good explicit knowledge of the grammar of the target language.
|
|
||||||
A typical participant could be
|
|
||||||
- native or fluent speaker of the target language
|
|
||||||
- interested in languages on the theoretical level, and preferably familiar
|
|
||||||
with many languages (to be able to think about them on an abstract level)
|
|
||||||
- familiar with functional programming languages such as ML or Haskell
|
|
||||||
(GF itself is a language similar to these)
|
|
||||||
- on Master's or PhD level in linguistics, computer science, or mathematics
|
|
||||||
|
|
||||||
|
|
||||||
But it is the quality of the assignment that is assessed, not any formal
|
|
||||||
requirements. The "typical participant" was described to give an idea of
|
|
||||||
who is likely to succeed in this.
|
|
||||||
|
|
||||||
|
|
||||||
===The Summer School===
|
|
||||||
|
|
||||||
A Summer School on resource grammars and applications will
|
|
||||||
be organized at the campus of Chalmers University of Technology in Gothenburg,
|
|
||||||
Sweden, on 17-28 August 2009. It can be seen as a natural checkpoint in
|
|
||||||
a resource grammar project; the participants are assumed to learn GF before
|
|
||||||
the Summer School, but how far they have come in their projects may vary.
|
|
||||||
|
|
||||||
More information on the Summer School web page:
|
|
||||||
|
|
||||||
[``http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html`` http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-summerschool.html]
|
|
||||||
|
|
||||||
|
|
||||||
==Other project ideas==
|
|
||||||
|
|
||||||
===GF interpreter in Java===
|
|
||||||
|
|
||||||
The idea is to write a run-time system for GF grammars in Java. This enables
|
|
||||||
the use of **embedded grammars** in Java applications. This project is
|
|
||||||
a fresh-up of [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html],
|
|
||||||
now using the new run-time format PGF and addressing a new parsing algorithm.
|
|
||||||
|
|
||||||
Requirements: Java, Haskell, basics of compilers and parsing algorithms.
|
|
||||||
|
|
||||||
|
|
||||||
===GF interpreter in C#===
|
|
||||||
|
|
||||||
The idea is to write a run-time system for GF grammars in C#. This enables
|
|
||||||
the use of **embedded grammars** in C# applications. This project is
|
|
||||||
similar to [earlier work http://www.cs.chalmers.se/~bringert/gf/gf-java.html]
|
|
||||||
on Java, now addressing C# and using the new run-time format PGF.
|
|
||||||
|
|
||||||
Requirements: C#, Haskell, basics of compilers and parsing algorithms.
|
|
||||||
|
|
||||||
|
|
||||||
===GF localization library===
|
|
||||||
|
|
||||||
This is an idea for a software localization library using GF grammars.
|
|
||||||
The library should replace strings by grammar rules, which can be conceived
|
|
||||||
as very smart templates always guaranteeing grammatically correct output.
|
|
||||||
The library should be based on the
|
|
||||||
[GF Resource Grammar Library http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/lib/resource/doc/synopsis.html], providing infrastructure
|
|
||||||
currently for 12 languages.
|
|
||||||
|
|
||||||
Requirements: GF, some natural languages, some localization platform
|
|
||||||
|
|
||||||
|
|
||||||
===Multilingual grammar applications for mobile phones===
|
|
||||||
|
|
||||||
GF grammars can be compiled into programs that can be run on different
|
|
||||||
platforms, such as web browsers and mobile phones. An example is a
|
|
||||||
[numeral translator http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/demos/index-numbers.html] running on both these platforms.
|
|
||||||
|
|
||||||
The proposed project is rather open: find some cool applications of
|
|
||||||
the technology that are useful or entertaining for mobile phone users. A
|
|
||||||
part of the project is to investigate implementation issues such as making
|
|
||||||
the best use of the phone's resources. Possible applications have
|
|
||||||
something to do with translation; one suggestion is an sms editor/translator.
|
|
||||||
|
|
||||||
Requirements: GF, JavaScript, some phone application development tools
|
|
||||||
|
|
||||||
|
|
||||||
===Multilingual grammar applications for the web===
|
|
||||||
|
|
||||||
This project is rather open: find some cool applications of
|
|
||||||
the technology that are useful or entertaining on the web. Examples include
|
|
||||||
- translators: see [demo http://129.16.250.57:41296/translate]
|
|
||||||
- multilingual wikis: see [demo http://csmisc14.cs.chalmers.se/~meza/restWiki/wiki.cgi]
|
|
||||||
- fridge magnets: see [demo http://129.16.250.57:41296/fridge]
|
|
||||||
|
|
||||||
|
|
||||||
Requirements: GF, JavaScript or Java and Google Web Toolkit, CGI
|
|
||||||
|
|
||||||
|
|
||||||
===GMail gadget for GF===
|
|
||||||
|
|
||||||
It is possible to add custom gadgets to GMail. If you are going to write
|
|
||||||
e-mail in a foreign language then you probably will need help from
|
|
||||||
dictonary or you may want to check something in the grammar. GF provides
|
|
||||||
all resources that you may need but you have to think about how to
|
|
||||||
design gadget that fits well in the GMail environment and what
|
|
||||||
functionality from GF you want to expose.
|
|
||||||
|
|
||||||
Requirements: GF, Google Web Toolkit
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Dissemination and intellectual property==
|
|
||||||
|
|
||||||
All code suggested here will be released under the LGPL just like
|
|
||||||
the current resource grammars and run-time GF libraries,
|
|
||||||
with the copyright held by respective authors.
|
|
||||||
|
|
||||||
As a rule, the code will be distributed via the GF web site.
|
|
||||||
|
|
||||||
@@ -1,289 +0,0 @@
|
|||||||
(Adapted from KeY statistics by Vladimir Klebanov)
|
|
||||||
|
|
||||||
This is GF right now:
|
|
||||||
|
|
||||||
Total Physical Source Lines of Code (SLOC) = 42,467
|
|
||||||
|
|
||||||
Development Effort Estimate, Person-Years (Person-Months) = 10.24 (122.932)
|
|
||||||
(Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05))
|
|
||||||
|
|
||||||
Schedule Estimate, Years (Months) = 1.30 (15.56)
|
|
||||||
(Basic COCOMO model, Months = 2.5 * (person-months**0.38))
|
|
||||||
|
|
||||||
Estimated Average Number of Developers (Effort/Schedule) = 7.90
|
|
||||||
|
|
||||||
Total Estimated Cost to Develop = $ 1,383,870
|
|
||||||
(average salary = $56,286/year, overhead = 2.40).
|
|
||||||
|
|
||||||
SLOCCount, Copyright (C) 2001-2004 David A. Wheeler
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
----------- basis of counting: Haskell code + BNFC code - generated Happy parsers
|
|
||||||
|
|
||||||
-- GF/src% wc -l *.hs GF/*.hs GF/*/*.hs GF/*/*/*.hs GF/*/*.cf JavaGUI/*.java
|
|
||||||
-- date Fri Jun 3 10:00:31 CEST 2005
|
|
||||||
|
|
||||||
104 GF.hs
|
|
||||||
402 GF/API.hs
|
|
||||||
98 GF/GFModes.hs
|
|
||||||
379 GF/Shell.hs
|
|
||||||
4 GF/Today.hs
|
|
||||||
43 GF/API/BatchTranslate.hs
|
|
||||||
145 GF/API/GrammarToHaskell.hs
|
|
||||||
77 GF/API/IOGrammar.hs
|
|
||||||
25 GF/API/MyParser.hs
|
|
||||||
177 GF/Canon/AbsGFC.hs
|
|
||||||
37 GF/Canon/ByLine.hs
|
|
||||||
192 GF/Canon/CanonToGrammar.hs
|
|
||||||
293 GF/Canon/CMacros.hs
|
|
||||||
79 GF/Canon/GetGFC.hs
|
|
||||||
86 GF/Canon/GFC.hs
|
|
||||||
291 GF/Canon/LexGFC.hs
|
|
||||||
201 GF/Canon/Look.hs
|
|
||||||
235 GF/Canon/MkGFC.hs
|
|
||||||
46 GF/Canon/PrExp.hs
|
|
||||||
352 GF/Canon/PrintGFC.hs
|
|
||||||
147 GF/Canon/Share.hs
|
|
||||||
207 GF/Canon/SkelGFC.hs
|
|
||||||
46 GF/Canon/TestGFC.hs
|
|
||||||
49 GF/Canon/Unlex.hs
|
|
||||||
202 GF/CF/CanonToCF.hs
|
|
||||||
213 GF/CF/CF.hs
|
|
||||||
217 GF/CF/CFIdent.hs
|
|
||||||
62 GF/CF/CFtoGrammar.hs
|
|
||||||
47 GF/CF/CFtoSRG.hs
|
|
||||||
206 GF/CF/ChartParser.hs
|
|
||||||
191 GF/CF/EBNF.hs
|
|
||||||
45 GF/CFGM/AbsCFG.hs
|
|
||||||
312 GF/CFGM/LexCFG.hs
|
|
||||||
157 GF/CFGM/PrintCFG.hs
|
|
||||||
109 GF/CFGM/PrintCFGrammar.hs
|
|
||||||
85 GF/CF/PPrCF.hs
|
|
||||||
150 GF/CF/PrLBNF.hs
|
|
||||||
106 GF/CF/Profile.hs
|
|
||||||
141 GF/Compile/BackOpt.hs
|
|
||||||
763 GF/Compile/CheckGrammar.hs
|
|
||||||
337 GF/Compile/Compile.hs
|
|
||||||
136 GF/Compile/Extend.hs
|
|
||||||
124 GF/Compile/GetGrammar.hs
|
|
||||||
282 GF/Compile/GrammarToCanon.hs
|
|
||||||
93 GF/Compile/MkConcrete.hs
|
|
||||||
128 GF/Compile/MkResource.hs
|
|
||||||
83 GF/Compile/MkUnion.hs
|
|
||||||
146 GF/Compile/ModDeps.hs
|
|
||||||
294 GF/Compile/NewRename.hs
|
|
||||||
227 GF/Compile/Optimize.hs
|
|
||||||
76 GF/Compile/PGrammar.hs
|
|
||||||
84 GF/Compile/PrOld.hs
|
|
||||||
119 GF/Compile/Rebuild.hs
|
|
||||||
63 GF/Compile/RemoveLiT.hs
|
|
||||||
274 GF/Compile/Rename.hs
|
|
||||||
535 GF/Compile/ShellState.hs
|
|
||||||
135 GF/Compile/Update.hs
|
|
||||||
129 GF/Conversion/GFC.hs
|
|
||||||
149 GF/Conversion/GFCtoSimple.hs
|
|
||||||
53 GF/Conversion/MCFGtoCFG.hs
|
|
||||||
46 GF/Conversion/RemoveEpsilon.hs
|
|
||||||
102 GF/Conversion/RemoveErasing.hs
|
|
||||||
82 GF/Conversion/RemoveSingletons.hs
|
|
||||||
137 GF/Conversion/SimpleToFinite.hs
|
|
||||||
26 GF/Conversion/SimpleToMCFG.hs
|
|
||||||
230 GF/Conversion/Types.hs
|
|
||||||
143 GF/Data/Assoc.hs
|
|
||||||
118 GF/Data/BacktrackM.hs
|
|
||||||
20 GF/Data/ErrM.hs
|
|
||||||
119 GF/Data/GeneralDeduction.hs
|
|
||||||
30 GF/Data/Glue.hs
|
|
||||||
67 GF/Data/IncrementalDeduction.hs
|
|
||||||
61 GF/Data/Map.hs
|
|
||||||
662 GF/Data/Operations.hs
|
|
||||||
127 GF/Data/OrdMap2.hs
|
|
||||||
120 GF/Data/OrdSet.hs
|
|
||||||
193 GF/Data/Parsers.hs
|
|
||||||
64 GF/Data/RedBlack.hs
|
|
||||||
150 GF/Data/RedBlackSet.hs
|
|
||||||
19 GF/Data/SharedString.hs
|
|
||||||
127 GF/Data/SortedList.hs
|
|
||||||
134 GF/Data/Str.hs
|
|
||||||
120 GF/Data/Trie2.hs
|
|
||||||
129 GF/Data/Trie.hs
|
|
||||||
71 GF/Data/Utilities.hs
|
|
||||||
243 GF/Data/Zipper.hs
|
|
||||||
78 GF/Embed/EmbedAPI.hs
|
|
||||||
113 GF/Embed/EmbedCustom.hs
|
|
||||||
137 GF/Embed/EmbedParsing.hs
|
|
||||||
50 GF/Formalism/CFG.hs
|
|
||||||
51 GF/Formalism/GCFG.hs
|
|
||||||
58 GF/Formalism/MCFG.hs
|
|
||||||
246 GF/Formalism/SimpleGFC.hs
|
|
||||||
349 GF/Formalism/Utilities.hs
|
|
||||||
30 GF/Fudgets/ArchEdit.hs
|
|
||||||
134 GF/Fudgets/CommandF.hs
|
|
||||||
51 GF/Fudgets/EventF.hs
|
|
||||||
59 GF/Fudgets/FudgetOps.hs
|
|
||||||
37 GF/Fudgets/UnicodeF.hs
|
|
||||||
86 GF/Grammar/AbsCompute.hs
|
|
||||||
38 GF/Grammar/Abstract.hs
|
|
||||||
149 GF/Grammar/AppPredefined.hs
|
|
||||||
312 GF/Grammar/Compute.hs
|
|
||||||
215 GF/Grammar/Grammar.hs
|
|
||||||
46 GF/Grammar/Lockfield.hs
|
|
||||||
189 GF/Grammar/LookAbs.hs
|
|
||||||
182 GF/Grammar/Lookup.hs
|
|
||||||
745 GF/Grammar/Macros.hs
|
|
||||||
340 GF/Grammar/MMacros.hs
|
|
||||||
115 GF/Grammar/PatternMatch.hs
|
|
||||||
279 GF/Grammar/PrGrammar.hs
|
|
||||||
121 GF/Grammar/Refresh.hs
|
|
||||||
44 GF/Grammar/ReservedWords.hs
|
|
||||||
251 GF/Grammar/TC.hs
|
|
||||||
301 GF/Grammar/TypeCheck.hs
|
|
||||||
96 GF/Grammar/Unify.hs
|
|
||||||
101 GF/Grammar/Values.hs
|
|
||||||
89 GF/Infra/CheckM.hs
|
|
||||||
43 GF/Infra/Comments.hs
|
|
||||||
152 GF/Infra/Ident.hs
|
|
||||||
390 GF/Infra/Modules.hs
|
|
||||||
358 GF/Infra/Option.hs
|
|
||||||
179 GF/Infra/Print.hs
|
|
||||||
331 GF/Infra/ReadFiles.hs
|
|
||||||
337 GF/Infra/UseIO.hs
|
|
||||||
153 GF/OldParsing/CFGrammar.hs
|
|
||||||
283 GF/OldParsing/ConvertFiniteGFC.hs
|
|
||||||
121 GF/OldParsing/ConvertFiniteSimple.hs
|
|
||||||
34 GF/OldParsing/ConvertGFCtoMCFG.hs
|
|
||||||
122 GF/OldParsing/ConvertGFCtoSimple.hs
|
|
||||||
44 GF/OldParsing/ConvertGrammar.hs
|
|
||||||
52 GF/OldParsing/ConvertMCFGtoCFG.hs
|
|
||||||
30 GF/OldParsing/ConvertSimpleToMCFG.hs
|
|
||||||
43 GF/OldParsing/GCFG.hs
|
|
||||||
86 GF/OldParsing/GeneralChart.hs
|
|
||||||
148 GF/OldParsing/GrammarTypes.hs
|
|
||||||
50 GF/OldParsing/IncrementalChart.hs
|
|
||||||
206 GF/OldParsing/MCFGrammar.hs
|
|
||||||
43 GF/OldParsing/ParseCFG.hs
|
|
||||||
82 GF/OldParsing/ParseCF.hs
|
|
||||||
177 GF/OldParsing/ParseGFC.hs
|
|
||||||
37 GF/OldParsing/ParseMCFG.hs
|
|
||||||
161 GF/OldParsing/SimpleGFC.hs
|
|
||||||
188 GF/OldParsing/Utilities.hs
|
|
||||||
51 GF/Parsing/CFG.hs
|
|
||||||
66 GF/Parsing/CF.hs
|
|
||||||
151 GF/Parsing/GFC.hs
|
|
||||||
64 GF/Parsing/MCFG.hs
|
|
||||||
83 GF/Printing/PrintParser.hs
|
|
||||||
127 GF/Printing/PrintSimplifiedTerm.hs
|
|
||||||
190 GF/Shell/CommandL.hs
|
|
||||||
556 GF/Shell/Commands.hs
|
|
||||||
524 GF/Shell/HelpFile.hs
|
|
||||||
79 GF/Shell/JGF.hs
|
|
||||||
171 GF/Shell/PShell.hs
|
|
||||||
221 GF/Shell/ShellCommands.hs
|
|
||||||
66 GF/Shell/SubShell.hs
|
|
||||||
87 GF/Shell/TeachYourself.hs
|
|
||||||
296 GF/Source/AbsGF.hs
|
|
||||||
229 GF/Source/GrammarToSource.hs
|
|
||||||
312 GF/Source/LexGF.hs
|
|
||||||
528 GF/Source/PrintGF.hs
|
|
||||||
353 GF/Source/SkelGF.hs
|
|
||||||
657 GF/Source/SourceToGrammar.hs
|
|
||||||
58 GF/Source/TestGF.hs
|
|
||||||
72 GF/Speech/PrGSL.hs
|
|
||||||
65 GF/Speech/PrJSGF.hs
|
|
||||||
128 GF/Speech/SRG.hs
|
|
||||||
103 GF/Speech/TransformCFG.hs
|
|
||||||
30 GF/System/ArchEdit.hs
|
|
||||||
90 GF/System/Arch.hs
|
|
||||||
27 GF/System/NoReadline.hs
|
|
||||||
27 GF/System/Readline.hs
|
|
||||||
73 GF/System/Tracing.hs
|
|
||||||
25 GF/System/UseReadline.hs
|
|
||||||
63 GF/Text/Arabic.hs
|
|
||||||
97 GF/Text/Devanagari.hs
|
|
||||||
72 GF/Text/Ethiopic.hs
|
|
||||||
99 GF/Text/ExtendedArabic.hs
|
|
||||||
37 GF/Text/ExtraDiacritics.hs
|
|
||||||
172 GF/Text/Greek.hs
|
|
||||||
53 GF/Text/Hebrew.hs
|
|
||||||
95 GF/Text/Hiragana.hs
|
|
||||||
69 GF/Text/LatinASupplement.hs
|
|
||||||
47 GF/Text/OCSCyrillic.hs
|
|
||||||
45 GF/Text/Russian.hs
|
|
||||||
77 GF/Text/Tamil.hs
|
|
||||||
125 GF/Text/Text.hs
|
|
||||||
69 GF/Text/Unicode.hs
|
|
||||||
47 GF/Text/UTF8.hs
|
|
||||||
56 GF/Translate/GFT.hs
|
|
||||||
427 GF/UseGrammar/Custom.hs
|
|
||||||
435 GF/UseGrammar/Editing.hs
|
|
||||||
180 GF/UseGrammar/Generate.hs
|
|
||||||
71 GF/UseGrammar/GetTree.hs
|
|
||||||
143 GF/UseGrammar/Information.hs
|
|
||||||
228 GF/UseGrammar/Linear.hs
|
|
||||||
130 GF/UseGrammar/Morphology.hs
|
|
||||||
70 GF/UseGrammar/Paraphrases.hs
|
|
||||||
157 GF/UseGrammar/Parsing.hs
|
|
||||||
66 GF/UseGrammar/Randomized.hs
|
|
||||||
170 GF/UseGrammar/Session.hs
|
|
||||||
186 GF/UseGrammar/Tokenize.hs
|
|
||||||
43 GF/UseGrammar/Transfer.hs
|
|
||||||
122 GF/Visualization/NewVisualizationGrammar.hs
|
|
||||||
123 GF/Visualization/VisualizeGrammar.hs
|
|
||||||
63 GF/Conversion/SimpleToMCFG/Coercions.hs
|
|
||||||
256 GF/Conversion/SimpleToMCFG/Nondet.hs
|
|
||||||
129 GF/Conversion/SimpleToMCFG/Strict.hs
|
|
||||||
71 GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs
|
|
||||||
281 GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs
|
|
||||||
277 GF/OldParsing/ConvertGFCtoMCFG/Old.hs
|
|
||||||
189 GF/OldParsing/ConvertGFCtoMCFG/Strict.hs
|
|
||||||
70 GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs
|
|
||||||
245 GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs
|
|
||||||
277 GF/OldParsing/ConvertSimpleToMCFG/Old.hs
|
|
||||||
139 GF/OldParsing/ConvertSimpleToMCFG/Strict.hs
|
|
||||||
83 GF/OldParsing/ParseCFG/General.hs
|
|
||||||
142 GF/OldParsing/ParseCFG/Incremental.hs
|
|
||||||
156 GF/OldParsing/ParseMCFG/Basic.hs
|
|
||||||
103 GF/Parsing/CFG/General.hs
|
|
||||||
150 GF/Parsing/CFG/Incremental.hs
|
|
||||||
98 GF/Parsing/CFG/PInfo.hs
|
|
||||||
226 GF/Parsing/MCFG/Active2.hs
|
|
||||||
304 GF/Parsing/MCFG/Active.hs
|
|
||||||
144 GF/Parsing/MCFG/Incremental2.hs
|
|
||||||
163 GF/Parsing/MCFG/Incremental.hs
|
|
||||||
128 GF/Parsing/MCFG/Naive.hs
|
|
||||||
163 GF/Parsing/MCFG/PInfo.hs
|
|
||||||
194 GF/Parsing/MCFG/Range.hs
|
|
||||||
183 GF/Parsing/MCFG/ViaCFG.hs
|
|
||||||
167 GF/Canon/GFC.cf
|
|
||||||
36 GF/CFGM/CFG.cf
|
|
||||||
321 GF/Source/GF.cf
|
|
||||||
272 JavaGUI/DynamicTree2.java
|
|
||||||
272 JavaGUI/DynamicTree.java
|
|
||||||
2357 JavaGUI/GFEditor2.java
|
|
||||||
1420 JavaGUI/GFEditor.java
|
|
||||||
30 JavaGUI/GrammarFilter.java
|
|
||||||
13 JavaGUI/LinPosition.java
|
|
||||||
18 JavaGUI/MarkedArea.java
|
|
||||||
1552 JavaGUI/Numerals.java
|
|
||||||
22 JavaGUI/Utils.java
|
|
||||||
5956 total
|
|
||||||
48713 total
|
|
||||||
|
|
||||||
- 2131 GF/Canon/ParGFC.hs
|
|
||||||
3336 GF/Source/ParGF.hs
|
|
||||||
779 GF/CFGM/ParCFG.hs
|
|
||||||
|
|
||||||
42467 total
|
|
||||||
|
|
||||||
--------
|
|
||||||
|
|
||||||
sloccount sloc =
|
|
||||||
let
|
|
||||||
ksloc = sloc / 1000
|
|
||||||
effort = 2.4 * (ksloc ** 1.05)
|
|
||||||
schedule = 2.5 * (effort ** 0.38)
|
|
||||||
develops = effort / schedule
|
|
||||||
cost = 56286 * (effort/12) * 2.4
|
|
||||||
in
|
|
||||||
[sloc,ksloc,effort,effort/12,schedule,schedule/12,develops,cost]
|
|
||||||
@@ -1,533 +0,0 @@
|
|||||||
GF Resource Grammar Summer School
|
|
||||||
Gothenburg, 17-28 August 2009
|
|
||||||
Aarne Ranta (aarne at chalmers.se)
|
|
||||||
|
|
||||||
%!Encoding : iso-8859-1
|
|
||||||
|
|
||||||
%!target:html
|
|
||||||
%!postproc(html): #BECE <center>
|
|
||||||
%!postproc(html): #ENCE </center>
|
|
||||||
%!postproc(html): #GRAY <font color="green" size="-1">
|
|
||||||
%!postproc(html): #EGRAY </font>
|
|
||||||
%!postproc(html): #RED <font color="red">
|
|
||||||
%!postproc(html): #YELLOW <font color="orange">
|
|
||||||
%!postproc(html): #ERED </font>
|
|
||||||
|
|
||||||
#BECE
|
|
||||||
[school-langs.png]
|
|
||||||
#ENCE
|
|
||||||
|
|
||||||
|
|
||||||
//red=wanted, green=exists, orange=in-progress, solid=official-eu, dotted=non-eu//
|
|
||||||
|
|
||||||
|
|
||||||
==News==
|
|
||||||
|
|
||||||
An on-line course //GF for Resource Grammar Writers// will start on
|
|
||||||
Monday 20 April at 15.30 CEST. The slides and recordings of the five
|
|
||||||
45-minute lectures will be made available via this web page. If requested,
|
|
||||||
the course may be repeated in the beginning of the summer school.
|
|
||||||
|
|
||||||
|
|
||||||
==Executive summary==
|
|
||||||
|
|
||||||
GF Resource Grammar Library is an open-source computational grammar resource
|
|
||||||
that currently covers 12 languages.
|
|
||||||
The Summer School is a part of a collaborative effort to extend the library
|
|
||||||
to all of the 23 official EU languages. Also other languages
|
|
||||||
chosen by the participants are welcome.
|
|
||||||
|
|
||||||
The missing EU languages are:
|
|
||||||
Czech, Dutch, Estonian, Greek, Hungarian, Irish, Latvian, Lithuanian,
|
|
||||||
Maltese, Portuguese, Slovak, and Slovenian. There is also more work to
|
|
||||||
be done on Polish and Romanian.
|
|
||||||
|
|
||||||
The linguistic coverage of the library includes the inflectional morphology
|
|
||||||
and basic syntax of each language. It can be used in GF applications
|
|
||||||
and also ported to other formats. It can also be used for building other
|
|
||||||
linguistic resources, such as morphological lexica and parsers.
|
|
||||||
The library is licensed under LGPL.
|
|
||||||
|
|
||||||
In the summer school, each language will be implemented by one or two students
|
|
||||||
working together. A morphology implementation will be credited
|
|
||||||
as a Chalmers course worth 7.5 ETCS points; adding a syntax implementation
|
|
||||||
will be worth more. The estimated total work load is 1-2 months for the
|
|
||||||
morphology, and 3-6 months for the whole grammar.
|
|
||||||
|
|
||||||
Participation in the course is free. Registration is done via the courses's
|
|
||||||
Google group, [``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]. The registration deadline is 15 June 2009.
|
|
||||||
|
|
||||||
Some travel grants will be available. They are distributed on the basis of a
|
|
||||||
GF programming contest in April and May.
|
|
||||||
|
|
||||||
The summer school will be held on 17-28 August 2009, at the campus of
|
|
||||||
Chalmers University of Technology in Gothenburg, Sweden.
|
|
||||||
|
|
||||||
|
|
||||||
[align6.png]
|
|
||||||
|
|
||||||
//Word alignment produced by GF from the resource grammar in Bulgarian, English, Italian, German, Finnish, French, and Swedish.//
|
|
||||||
|
|
||||||
==Introduction==
|
|
||||||
|
|
||||||
Since 2007, EU-27 has 23 official languages, listed in the diagram on top of this
|
|
||||||
document. There is a growing need of linguistic resources for these
|
|
||||||
languages, to help in tasks such as translation and information retrieval.
|
|
||||||
These resources should be **portable** and **freely accessible**.
|
|
||||||
Languages marked in red in the diagram are of particular interest for
|
|
||||||
the summer school, since they are those on which the effort will be concentrated.
|
|
||||||
|
|
||||||
GF (Grammatical Framework,
|
|
||||||
[``digitalgrammars.com/gf`` http://digitalgrammars.com/gf])
|
|
||||||
is a **functional programming language** designed for writing natural
|
|
||||||
language grammars. It provides an efficient platform for this task, due to
|
|
||||||
its modern characteristics:
|
|
||||||
- It is a functional programming language, similar to Haskell and ML.
|
|
||||||
- It has a static type system and type checker.
|
|
||||||
- It has a powerful module system supporting separate compilation
|
|
||||||
and data abstraction.
|
|
||||||
- It has an optimizing compiler to **Portable Grammar Format** (PGF).
|
|
||||||
- PGF can be further compiled to other formats, such as JavaScript and
|
|
||||||
speech recognition language models.
|
|
||||||
- GF has a **resource grammar library** giving access to the morphology and
|
|
||||||
basic syntax of 12 languages.
|
|
||||||
|
|
||||||
|
|
||||||
In addition to "ordinary" grammars for single languages, GF
|
|
||||||
supports **multilingual grammars**. A multilingual GF grammar consists of an
|
|
||||||
**abstract syntax** and a set of **concrete syntaxes**.
|
|
||||||
An abstract syntax is system of **trees**, serving as a semantic
|
|
||||||
model or an ontology. A concrete syntax is a mapping from abstract syntax
|
|
||||||
trees to strings of a particular language.
|
|
||||||
|
|
||||||
These mappings defined in concrete syntax are **reversible**: they
|
|
||||||
can be used both for **generating** strings from trees, and for
|
|
||||||
**parsing** strings into trees. Combinations of generation and
|
|
||||||
parsing can be used for **translation**, where the abstract
|
|
||||||
syntax works as an **interlingua**. Thus GF has been used as a
|
|
||||||
framework for building translation systems in several areas
|
|
||||||
of application and large sets of languages.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==The GF resource grammar library==
|
|
||||||
|
|
||||||
The GF resource grammar library is a set of grammars usable as libraries when
|
|
||||||
building translation systems and other applications.
|
|
||||||
The library currently covers
|
|
||||||
the 9 languages coloured in green in the diagram above; in addition,
|
|
||||||
Catalan, Norwegian, and Russian are covered, and there is ongoing work on
|
|
||||||
Arabic, Hindi/Urdu, Polish, Romanian, and Thai.
|
|
||||||
|
|
||||||
The purpose of the resource grammar library is to define the "low-level" structure
|
|
||||||
of a language: inflection, word order, agreement. This structure belongs to what
|
|
||||||
linguists call morphology and syntax. It can be very complex and requires
|
|
||||||
a lot of knowledge. Yet, when translating from one language to
|
|
||||||
another, knowing morphology and syntax is but a part of what is needed.
|
|
||||||
The translator (whether human
|
|
||||||
or machine) must understand the meaning of what is translated, and must also know
|
|
||||||
the idiomatic way to express the meaning in the target language. This knowledge
|
|
||||||
can be very domain-dependent and requires in general an expert in the field to
|
|
||||||
reach high quality: a mathematician in the field of mathematics, a meteorologist
|
|
||||||
in the field of weather reports, etc.
|
|
||||||
|
|
||||||
The problem is to find a person who is an expert in both the domain of translation
|
|
||||||
and in the low-level linguistic details. It is the rareness of this combination
|
|
||||||
that has made it difficult to build interlingua-based translation systems.
|
|
||||||
The GF resource grammar library has the mission of helping in this task.
|
|
||||||
It encapsulates the low-level linguistics in program modules
|
|
||||||
accessed through easy-to-use interfaces.
|
|
||||||
Experts on different domains can build translation systems by using the library,
|
|
||||||
without knowing low-level linguistics. The idea is much the same as when a
|
|
||||||
programmer builds a graphical user interface (GUI) from high-level elements such as
|
|
||||||
buttons and menus, without having to care about pixels or geometrical forms.
|
|
||||||
|
|
||||||
|
|
||||||
===Missing EU languages, by the family===
|
|
||||||
|
|
||||||
Writing a grammar for a language is usually easier if other languages
|
|
||||||
from the same family already have grammars. The colours have the same
|
|
||||||
meaning as in the diagram above.
|
|
||||||
|
|
||||||
Baltic:
|
|
||||||
#RED Latvian #ERED
|
|
||||||
#RED Lithuanian #ERED
|
|
||||||
|
|
||||||
Celtic:
|
|
||||||
#RED Irish #ERED
|
|
||||||
|
|
||||||
Fenno-Ugric:
|
|
||||||
#RED Estonian #ERED
|
|
||||||
#GRAY Finnish #EGRAY
|
|
||||||
#RED Hungarian #ERED
|
|
||||||
|
|
||||||
Germanic:
|
|
||||||
#GRAY Danish #EGRAY
|
|
||||||
#RED Dutch #ERED
|
|
||||||
#GRAY English #EGRAY
|
|
||||||
#GRAY German #EGRAY
|
|
||||||
#GRAY Swedish #EGRAY
|
|
||||||
|
|
||||||
Hellenic:
|
|
||||||
#RED Greek #ERED
|
|
||||||
|
|
||||||
Romance:
|
|
||||||
#GRAY French #EGRAY
|
|
||||||
#GRAY Italian #EGRAY
|
|
||||||
#RED Portuguese #ERED
|
|
||||||
#YELLOW Romanian #ERED
|
|
||||||
#GRAY Spanish #EGRAY
|
|
||||||
|
|
||||||
Semitic:
|
|
||||||
#RED Maltese #ERED
|
|
||||||
|
|
||||||
Slavonic:
|
|
||||||
#GRAY Bulgarian #EGRAY
|
|
||||||
#RED Czech #ERED
|
|
||||||
#YELLOW Polish #ERED
|
|
||||||
#RED Slovak #ERED
|
|
||||||
#RED Slovenian #ERED
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Applications of the library===
|
|
||||||
|
|
||||||
In addition to translation, the library is also useful in **localization**,
|
|
||||||
that is, porting a piece of software to new languages.
|
|
||||||
The GF resource grammar library has been used in three major projects that need
|
|
||||||
interlingua-based translation or localization of systems to new languages:
|
|
||||||
- in KeY,
|
|
||||||
[``http://www.key-project.org/`` http://www.key-project.org/],
|
|
||||||
for writing formal and informal software specifications (3 languages)
|
|
||||||
- in WebALT,
|
|
||||||
[``http://webalt.math.helsinki.fi/content/index_eng.html`` http://webalt.math.helsinki.fi/content/index_eng.html],
|
|
||||||
for translating mathematical exercises to 7 languages
|
|
||||||
- in TALK [``http://www.talk-project.org`` http://www.talk-project.org],
|
|
||||||
where the library was used for localizing spoken dialogue systems
|
|
||||||
to six languages
|
|
||||||
|
|
||||||
|
|
||||||
The library is also a generic **linguistic resource**,
|
|
||||||
which can be used for tasks
|
|
||||||
such as language teaching and information retrieval. The liberal license (LGPL)
|
|
||||||
makes it usable for anyone and for any task. GF also has tools supporting the
|
|
||||||
use of grammars in programs written in other
|
|
||||||
programming languages: C, C++, Haskell,
|
|
||||||
Java, JavaScript, and Prolog. In connection with the TALK project,
|
|
||||||
support has also been
|
|
||||||
developed for translating GF grammars to language models used in speech
|
|
||||||
recognition (GSL/Nuance, HTK/ATK, SRGS, JSGF).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===The structure of the library===
|
|
||||||
|
|
||||||
The library has the following main parts:
|
|
||||||
- **Inflection paradigms**, covering the inflection of each language.
|
|
||||||
- **Core Syntax**, covering a large set of syntax rule that
|
|
||||||
can be implemented for all languages involved.
|
|
||||||
- **Common Test Lexicon**, giving ca. 500 common words that can be used for
|
|
||||||
testing the library.
|
|
||||||
- **Language-Specific Syntax Extensions**, covering syntax rules that are
|
|
||||||
not implementable for all languages.
|
|
||||||
- **Language-Specific Lexica**, word lists for each language, with
|
|
||||||
accurate morphological and syntactic information.
|
|
||||||
|
|
||||||
|
|
||||||
The goal of the summer school is to implement, for each language, at least
|
|
||||||
the first three components. The latter three are more open-ended in character.
|
|
||||||
|
|
||||||
|
|
||||||
==The summer school==
|
|
||||||
|
|
||||||
The goal of the summer school is to extend the GF resource grammar library
|
|
||||||
to covering all 23 EU languages, which means we need 15 new languages.
|
|
||||||
We also welcome other languages than these 23,
|
|
||||||
if there are interested participants.
|
|
||||||
|
|
||||||
The amount of work and skill is between a Master's thesis and a PhD thesis.
|
|
||||||
The Russian implementation was made by Janna Khegai as a part of her
|
|
||||||
PhD thesis; the thesis contains other material, too.
|
|
||||||
The Arabic implementation was started by Ali El Dada in his Master's thesis,
|
|
||||||
but the thesis does not cover the whole API. The realistic amount of work is
|
|
||||||
somewhere between 3 and 8 person months,
|
|
||||||
but this is very much language-dependent.
|
|
||||||
Dutch, for instance, can profit from previous implementations of German and
|
|
||||||
Scandinavian languages, and will probably require less work.
|
|
||||||
Latvian and Lithuanian are the first languages of the Baltic family and
|
|
||||||
will probably require more work.
|
|
||||||
|
|
||||||
In any case, the proposed allocation of work power is 2 participants per
|
|
||||||
language. They will do 1 months' worth of home work, followed
|
|
||||||
by 2 weeks of summer school, followed by 4 months work at home.
|
|
||||||
Who are these participants?
|
|
||||||
|
|
||||||
|
|
||||||
===Selecting participants===
|
|
||||||
|
|
||||||
Persons interested to participate in the Summer School should sign up in
|
|
||||||
the **Google Group** of the course,
|
|
||||||
|
|
||||||
[``groups.google.com/group/gf-resource-school-2009/`` http://groups.google.com/group/gf-resource-school-2009/]
|
|
||||||
|
|
||||||
The registration deadline is 15 June 2009.
|
|
||||||
|
|
||||||
Notice: you can sign up in the Google
|
|
||||||
group even if you are not planning to attend the summer school, but are
|
|
||||||
just interested in the topic. There will be a separate registration to the
|
|
||||||
school itself later.
|
|
||||||
|
|
||||||
The participants are recommended to learn GF in advance, by self-study from the
|
|
||||||
[tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html].
|
|
||||||
This should take a couple of weeks. An **on-line course** will be
|
|
||||||
arranged on 20-29 April to help in getting started with GF.
|
|
||||||
|
|
||||||
At the end of the on-line course, a **programming assignment** will be published.
|
|
||||||
This assignment will test skills required in resource grammar programming.
|
|
||||||
Work on the assignment will take a couple of weeks.
|
|
||||||
Those who are interested in getting a travel grant will submit
|
|
||||||
their sample resource grammar fragment
|
|
||||||
to the Summer School Committee by 12 May.
|
|
||||||
The Committee then decides who is given a travel grant of up to 1000 EUR.
|
|
||||||
|
|
||||||
Notice: you can participate in the summer school without following the on-line
|
|
||||||
course or participating in the contest. These things are required only if you
|
|
||||||
want a travel grant. If requested by enough many participants, the lectures of
|
|
||||||
the on-line course will be repeated in the beginning of the summer school.
|
|
||||||
|
|
||||||
The summer school itself is devoted for working on resource grammars.
|
|
||||||
In addition to grammar writing itself, testing and evaluation is
|
|
||||||
performed. One way to do this is via adding new languages
|
|
||||||
to resource grammar applications - in particular, to the WebALT mathematical
|
|
||||||
exercise translator.
|
|
||||||
|
|
||||||
The resource grammars are expected to be completed by December 2009. They will
|
|
||||||
be published at GF website and licensed under LGPL.
|
|
||||||
|
|
||||||
The participants are encouraged to contact each other and even work in groups.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Who is qualified===
|
|
||||||
|
|
||||||
Writing a resource grammar implementation requires good general programming
|
|
||||||
skills, and a good explicit knowledge of the grammar of the target language.
|
|
||||||
A typical participant could be
|
|
||||||
- native or fluent speaker of the target language
|
|
||||||
- interested in languages on the theoretical level, and preferably familiar
|
|
||||||
with many languages (to be able to think about them on an abstract level)
|
|
||||||
- familiar with functional programming languages such as ML or Haskell
|
|
||||||
(GF itself is a language similar to these)
|
|
||||||
- on Master's or PhD level in linguistics, computer science, or mathematics
|
|
||||||
|
|
||||||
|
|
||||||
But it is the quality of the assignment that is assessed, not any formal
|
|
||||||
requirements. The "typical participant" was described to give an idea of
|
|
||||||
who is likely to succeed in this.
|
|
||||||
|
|
||||||
|
|
||||||
===Costs===
|
|
||||||
|
|
||||||
The summer school is free of charge.
|
|
||||||
|
|
||||||
Some travel grants are given, on the basis of a programming contest,
|
|
||||||
to cover travel and accommodation costs up to 1000 EUR
|
|
||||||
per person.
|
|
||||||
|
|
||||||
The number of grants will be decided during Spring 2009, and the grand
|
|
||||||
holders will be notified before the beginning of June.
|
|
||||||
|
|
||||||
Special terms will apply to students in
|
|
||||||
[GSLT http://www.gslt.hum.gu.se/] and
|
|
||||||
[NGSLT http://ngslt.org/].
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Teachers===
|
|
||||||
|
|
||||||
A list of teachers will be published here later. Some of the local teachers
|
|
||||||
probably involved are the following:
|
|
||||||
- Krasimir Angelov
|
|
||||||
- Robin Cooper
|
|
||||||
- Håkan Burden
|
|
||||||
- Markus Forsberg
|
|
||||||
- Harald Hammarström
|
|
||||||
- Peter Ljunglöf
|
|
||||||
- Aarne Ranta
|
|
||||||
|
|
||||||
|
|
||||||
More teachers are welcome! If you are interested, please contact us so that
|
|
||||||
we can discuss your involvement and travel arrangements.
|
|
||||||
|
|
||||||
In addition to teachers, we will look for consultants who can help to assess
|
|
||||||
the results for each language. Please contact us!
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===The Summer School Committee===
|
|
||||||
|
|
||||||
This committee consists of a number of teachers and informants,
|
|
||||||
who will select the participants. It will be selected by April 2009.
|
|
||||||
|
|
||||||
|
|
||||||
===Time and Place===
|
|
||||||
|
|
||||||
The summer school will
|
|
||||||
be organized at the campus of Chalmers University of Technology in Gothenburg,
|
|
||||||
Sweden, on 17-28 August 2009.
|
|
||||||
|
|
||||||
Time schedule:
|
|
||||||
- February: announcement of summer school
|
|
||||||
- 20-29 April: on-line course
|
|
||||||
- 12 May: submission deadline for assignment work
|
|
||||||
- 31 May: review of assignments, notifications of acceptance
|
|
||||||
- 15 June: **registration deadline**
|
|
||||||
- 17-28 August: Summer School
|
|
||||||
- September-December: homework on resource grammars
|
|
||||||
- December: release of the extended Resource Grammar Library
|
|
||||||
|
|
||||||
|
|
||||||
===Dissemination and intellectual property===
|
|
||||||
|
|
||||||
The new resource grammars will be released under the LGPL just like
|
|
||||||
the current resource grammars,
|
|
||||||
with the copyright held by respective authors.
|
|
||||||
|
|
||||||
The grammars will be distributed via the GF web site.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==Why I should participate==
|
|
||||||
|
|
||||||
Seven reasons:
|
|
||||||
+ participation in a pioneering language technology work in an
|
|
||||||
enthusiastic atmosphere
|
|
||||||
+ work and fun with people from all over Europe and the world
|
|
||||||
+ job opportunities and business ideas
|
|
||||||
+ credits: the school project will be established as a course at Chalmers worth
|
|
||||||
7.5 or 15 ETCS points per person, depending on the work accompliched; also
|
|
||||||
extensions to Master's thesis will be considered (special credit arrangements
|
|
||||||
for [GSLT http://www.gslt.hum.gu.se/] and [NGSLT http://ngslt.org/])
|
|
||||||
+ merits: the resulting grammar can easily lead to a published paper (see below)
|
|
||||||
+ contribution to the multilingual and multicultural development of Europe and the
|
|
||||||
world
|
|
||||||
+ free trip and stay in Gothenburg (for travel grant students)
|
|
||||||
|
|
||||||
|
|
||||||
==More information==
|
|
||||||
|
|
||||||
[Course Google Group http://groups.google.com/group/gf-resource-school-2009/]
|
|
||||||
|
|
||||||
[GF web page http://digitalgrammars.com/gf/]
|
|
||||||
|
|
||||||
[GF tutorial http://digitalgrammars.com/gf/doc/gf-tutorial.html]
|
|
||||||
|
|
||||||
[GF resource synopsis http://digitalgrammars.com/gf/lib/resource/doc/synopsis.html]
|
|
||||||
|
|
||||||
[Resource-HOWTO document http://digitalgrammars.com/gf/doc/Resource-HOWTO.html]
|
|
||||||
|
|
||||||
|
|
||||||
===Contact===
|
|
||||||
|
|
||||||
Håkan Burden: burden at chalmers se
|
|
||||||
|
|
||||||
Aarne Ranta: aarne at chalmers se
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Selected publications from earlier resource grammar projects===
|
|
||||||
|
|
||||||
K. Angelov.
|
|
||||||
Type-Theoretical Bulgarian Grammar.
|
|
||||||
In B. Nordström and A. Ranta (eds),
|
|
||||||
//Advances in Natural Language Processing (GoTAL 2008)//,
|
|
||||||
LNCS/LNAI 5221, Springer,
|
|
||||||
2008.
|
|
||||||
|
|
||||||
B. Bringert.
|
|
||||||
//Programming Language Techniques for Natural Language Applications//.
|
|
||||||
Phd thesis, Computer Science, University of Gothenburg,
|
|
||||||
2008.
|
|
||||||
|
|
||||||
A. El Dada and A. Ranta.
|
|
||||||
Implementing an Open Source Arabic Resource Grammar in GF.
|
|
||||||
In M. Mughazy (ed),
|
|
||||||
//Perspectives on Arabic Linguistics XX. Papers from the Twentieth Annual Symposium on Arabic Linguistics, Kalamazoo, March 26//
|
|
||||||
John Benjamins Publishing Company.
|
|
||||||
2007.
|
|
||||||
|
|
||||||
A. El Dada.
|
|
||||||
Implementation of the Arabic Numerals and their Syntax in GF.
|
|
||||||
Computational Approaches to Semitic Languages: Common Issues and Resources,
|
|
||||||
ACL-2007 Workshop,
|
|
||||||
June 28, 2007, Prague.
|
|
||||||
2007.
|
|
||||||
|
|
||||||
H. Hammarström and A. Ranta.
|
|
||||||
Cardinal Numerals Revisited in GF.
|
|
||||||
//Workshop on Numerals in the World's Languages//.
|
|
||||||
Dept. of Linguistics Max Planck Institute for Evolutionary Anthropology, Leipzig,
|
|
||||||
2004.
|
|
||||||
|
|
||||||
M. Humayoun, H. Hammarström, and A. Ranta.
|
|
||||||
Urdu Morphology, Orthography and Lexicon Extraction.
|
|
||||||
//CAASL-2: The Second Workshop on Computational Approaches to Arabic Script-based Languages//,
|
|
||||||
July 21-22, 2007, LSA 2007 Linguistic Institute, Stanford University.
|
|
||||||
2007.
|
|
||||||
|
|
||||||
K. Johannisson.
|
|
||||||
//Formal and Informal Software Specifications.//
|
|
||||||
Phd thesis, Computer Science, University of Gothenburg,
|
|
||||||
2005.
|
|
||||||
|
|
||||||
J. Khegai.
|
|
||||||
GF parallel resource grammars and Russian.
|
|
||||||
In proceedings of ACL2006
|
|
||||||
(The joint conference of the International Committee on Computational
|
|
||||||
Linguistics and the Association for Computational Linguistics) (pp. 475-482),
|
|
||||||
Sydney, Australia, July 2006.
|
|
||||||
|
|
||||||
J. Khegai.
|
|
||||||
//Language engineering in Grammatical Framework (GF)//.
|
|
||||||
Phd thesis, Computer Science, Chalmers University of Technology,
|
|
||||||
2006.
|
|
||||||
|
|
||||||
W. Ng'ang'a.
|
|
||||||
Multilingual content development for eLearning in Africa.
|
|
||||||
eLearning Africa: 1st Pan-African Conference on ICT for Development,
|
|
||||||
Education and Training. 24-26 May 2006, Addis Ababa, Ethiopia.
|
|
||||||
2006.
|
|
||||||
|
|
||||||
N. Perera and A. Ranta.
|
|
||||||
Dialogue System Localization with the GF Resource Grammar Library.
|
|
||||||
//SPEECHGRAM 2007: ACL Workshop on Grammar-Based Approaches to Spoken Language Processing//,
|
|
||||||
June 29, 2007, Prague.
|
|
||||||
2007.
|
|
||||||
|
|
||||||
A. Ranta.
|
|
||||||
Modular Grammar Engineering in GF.
|
|
||||||
//Research on Language and Computation//,
|
|
||||||
5:133-158, 2007.
|
|
||||||
|
|
||||||
A. Ranta.
|
|
||||||
How predictable is Finnish morphology? An experiment on lexicon construction.
|
|
||||||
In J. Nivre, M. Dahllöf and B. Megyesi (eds),
|
|
||||||
//Resourceful Language Technology: Festschrift in Honor of Anna Sågvall Hein//,
|
|
||||||
University of Uppsala,
|
|
||||||
2008.
|
|
||||||
|
|
||||||
A. Ranta. Grammars as Software Libraries.
|
|
||||||
To appear in
|
|
||||||
Y. Bertot, G. Huet, J-J. Lévy, and G. Plotkin (eds.),
|
|
||||||
//From Semantics to Computer Science//,
|
|
||||||
Cambridge University Press, Cambridge, 2009.
|
|
||||||
|
|
||||||
A. Ranta and K. Angelov.
|
|
||||||
Implementing Controlled Languages in GF.
|
|
||||||
To appear in the proceedings of //CNL 2009//.
|
|
||||||
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
||||||
<HTML>
|
|
||||||
<HEAD>
|
|
||||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
|
||||||
<TITLE>GF 3.0</TITLE>
|
|
||||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
|
||||||
<P ALIGN="center"><CENTER><H1>GF 3.0</H1>
|
|
||||||
<FONT SIZE="4">
|
|
||||||
<I>Krasimir Angelov, Björn Bringert, and Aarne Ranta</I><BR>
|
|
||||||
Beta release, 27 June 2008
|
|
||||||
</FONT></CENTER>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
GF Version 3.0 is a major revision of GF. The source language is a superset of the
|
|
||||||
language in 2.9, which means backward compatibility. But the target languages, the
|
|
||||||
compiler implementation, and the functionalities (e.g. the shell) have undergone
|
|
||||||
radical changes.
|
|
||||||
</P>
|
|
||||||
<H2>New features</H2>
|
|
||||||
<P>
|
|
||||||
Here is a summary of the main novelties visible to the user:
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI><B>Size</B>: the source code and the executable binary size have gone
|
|
||||||
down to about the half of 2.9.
|
|
||||||
<LI><B>Portability</B>: the new back end format PGF (Portable Grammar Format) is
|
|
||||||
much simpler than the old GFC format, and therefore easier to port to new
|
|
||||||
platforms.
|
|
||||||
<LI><B>Multilingual web page support</B>: as an example of portability, GF 3.0 provides a
|
|
||||||
compiler from PGF to JavaScript. There are also JavaScript libraries for creating
|
|
||||||
translators and syntax editors as client-side web applications.
|
|
||||||
<LI><B>Incremental parsing</B>: there is a possibility of word completion when
|
|
||||||
input strings are sent to the parser.
|
|
||||||
<LI><B>Application programmer's interfaces</B>: both source-GF and PGF formats,
|
|
||||||
the shell, and the compiler are accessible via high-level APIs.
|
|
||||||
<LI><B>Resource library version 1.4</B>: more coverage, more languages; some of
|
|
||||||
the new GF language features are exploited.
|
|
||||||
<LI><B>Uniform character encoding</B>: UTF8 in generated files, user-definable in
|
|
||||||
source files
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<H2>Non-supported features</H2>
|
|
||||||
<P>
|
|
||||||
There are some features of GF 2.9 that will <I>not</I> work in the 3.0 beta release.
|
|
||||||
</P>
|
|
||||||
<UL>
|
|
||||||
<LI>Java Editor GUI: we now see the JavaScript editor as the main form of
|
|
||||||
syntax editing.
|
|
||||||
<LI>Pre-module multi-file grammar format: the grammar format of GF before version 2.0
|
|
||||||
is still not yet supported.
|
|
||||||
<LI>Context-free and EBNF input grammar formats.
|
|
||||||
<LI>Probabilistic GF grammars.
|
|
||||||
<LI>Some output formats: LBNF.
|
|
||||||
<LI>Some GF shell commands: while the main ones will be supported with their familiar
|
|
||||||
syntax and options, some old commands have not been included. The GF shell
|
|
||||||
command <CODE>help -changes</CODE> gives the actual list.
|
|
||||||
</UL>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
Users who want to have these features are welcome to contact us,
|
|
||||||
and even more welcome to contribute code that restores them!
|
|
||||||
</P>
|
|
||||||
<H2>GF language extensions</H2>
|
|
||||||
<P>
|
|
||||||
Operations for defining patterns.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Inheritance of overload groups.
|
|
||||||
</P>
|
|
||||||
|
|
||||||
<!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
|
|
||||||
<!-- cmdline: txt2tags -thtml doc/gf3-release.txt -->
|
|
||||||
</BODY></HTML>
|
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
GF 3.0
|
|
||||||
Krasimir Angelov, Björn Bringert, and Aarne Ranta
|
|
||||||
Beta release, 27 June 2008
|
|
||||||
|
|
||||||
|
|
||||||
GF Version 3.0 is a major revision of GF. The source language is a superset of the
|
|
||||||
language in 2.9, which means backward compatibility. But the target languages, the
|
|
||||||
compiler implementation, and the functionalities (e.g. the shell) have undergone
|
|
||||||
radical changes.
|
|
||||||
|
|
||||||
|
|
||||||
==New features==
|
|
||||||
|
|
||||||
Here is a summary of the main novelties visible to the user:
|
|
||||||
- **Size**: the source code and the executable binary size have gone
|
|
||||||
down to about the half of 2.9.
|
|
||||||
- **Portability**: the new back end format PGF (Portable Grammar Format) is
|
|
||||||
much simpler than the old GFC format, and therefore easier to port to new
|
|
||||||
platforms.
|
|
||||||
- **Multilingual web page support**: as an example of portability, GF 3.0 provides a
|
|
||||||
compiler from PGF to JavaScript. There are also JavaScript libraries for creating
|
|
||||||
translators and syntax editors as client-side web applications.
|
|
||||||
- **Incremental parsing**: there is a possibility of word completion when
|
|
||||||
input strings are sent to the parser.
|
|
||||||
- **Application programmer's interfaces**: both source-GF and PGF formats,
|
|
||||||
the shell, and the compiler are accessible via high-level APIs.
|
|
||||||
- **Resource library version 1.4**: more coverage, more languages; some of
|
|
||||||
the new GF language features are exploited.
|
|
||||||
- **Uniform character encoding**: UTF8 in generated files, user-definable in
|
|
||||||
source files
|
|
||||||
|
|
||||||
|
|
||||||
==Non-supported features==
|
|
||||||
|
|
||||||
There are some features of GF 2.9 that will //not// work in the 3.0 beta release.
|
|
||||||
- Java Editor GUI: we now see the JavaScript editor as the main form of
|
|
||||||
syntax editing.
|
|
||||||
- Pre-module multi-file grammar format: the grammar format of GF before version 2.0
|
|
||||||
is still not yet supported.
|
|
||||||
- Context-free and EBNF input grammar formats.
|
|
||||||
- Probabilistic GF grammars.
|
|
||||||
- Some output formats: LBNF.
|
|
||||||
- Some GF shell commands: while the main ones will be supported with their familiar
|
|
||||||
syntax and options, some old commands have not been included. The GF shell
|
|
||||||
command ``help -changes`` gives the actual list.
|
|
||||||
|
|
||||||
|
|
||||||
Users who want to have these features are welcome to contact us,
|
|
||||||
and even more welcome to contribute code that restores them!
|
|
||||||
|
|
||||||
|
|
||||||
==GF language extensions==
|
|
||||||
|
|
||||||
Operations for defining patterns.
|
|
||||||
|
|
||||||
Inheritance of overload groups.
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
graph{
|
|
||||||
|
|
||||||
size = "8,8" ;
|
|
||||||
|
|
||||||
overlap = scale ;
|
|
||||||
|
|
||||||
"Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ;
|
|
||||||
|
|
||||||
"1" [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"1" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"2" [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"2" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"3" [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"3" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"4" [label = "German", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"4" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"5" [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"5" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"6" [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"6" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"7" [label = "English", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"7" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"8" [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"8" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"9" [label = "French", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"9" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"10" [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"10" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"11" [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"11" -- "Abs" [style = "solid"];
|
|
||||||
|
|
||||||
"12" [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "12" [style = "solid"];
|
|
||||||
|
|
||||||
"13" [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "13" [style = "solid"];
|
|
||||||
|
|
||||||
"14" [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "14" [style = "solid"];
|
|
||||||
|
|
||||||
"15" [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "15" [style = "solid"];
|
|
||||||
|
|
||||||
"16" [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "16" [style = "solid"];
|
|
||||||
|
|
||||||
"17" [label = "Polish", style = "solid", shape = "ellipse", color = "orange"] ;
|
|
||||||
"Abs" -- "17" [style = "solid"];
|
|
||||||
|
|
||||||
"18" [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "18" [style = "solid"];
|
|
||||||
|
|
||||||
"19" [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "19" [style = "solid"];
|
|
||||||
|
|
||||||
"20" [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "20" [style = "solid"];
|
|
||||||
|
|
||||||
"21" [label = "Romanian", style = "solid", shape = "ellipse", color = "orange"] ;
|
|
||||||
"Abs" -- "21" [style = "solid"];
|
|
||||||
|
|
||||||
"22" [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"Abs" -- "22" [style = "solid"];
|
|
||||||
|
|
||||||
"23" [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ;
|
|
||||||
"Abs" -- "23" [style = "solid"];
|
|
||||||
|
|
||||||
"24" [label = "Catalan", style = "dotted", shape = "ellipse", color = "green"] ;
|
|
||||||
"Abs" -- "24" [style = "solid"];
|
|
||||||
|
|
||||||
"25" [label = "Norwegian", style = "dotted", shape = "ellipse", color = "green"] ;
|
|
||||||
"Abs" -- "25" [style = "solid"];
|
|
||||||
|
|
||||||
"26" [label = "Russian", style = "dotted", shape = "ellipse", color = "green"] ;
|
|
||||||
"Abs" -- "26" [style = "solid"];
|
|
||||||
|
|
||||||
"27" [label = "Interlingua", style = "dotted", shape = "ellipse", color = "green"] ;
|
|
||||||
"Abs" -- "27" [style = "solid"];
|
|
||||||
|
|
||||||
"28" [label = "Latin", style = "dotted", shape = "ellipse", color = "orange"] ;
|
|
||||||
"Abs" -- "28" [style = "solid"];
|
|
||||||
"29" [label = "Turkish", style = "dotted", shape = "ellipse", color = "orange"] ;
|
|
||||||
"Abs" -- "29" [style = "solid"];
|
|
||||||
"30" [label = "Hindi", style = "dotted", shape = "ellipse", color = "orange"] ;
|
|
||||||
"Abs" -- "30" [style = "solid"];
|
|
||||||
"31" [label = "Thai", style = "dotted", shape = "ellipse", color = "orange"] ;
|
|
||||||
"Abs" -- "31" [style = "solid"];
|
|
||||||
"32" [label = "Urdu", style = "dotted", shape = "ellipse", color = "orange"] ;
|
|
||||||
"Abs" -- "32" [style = "solid"];
|
|
||||||
"33" [label = "Telugu", style = "dotted", shape = "ellipse", color = "red"] ;
|
|
||||||
"Abs" -- "33" [style = "solid"];
|
|
||||||
"34" [label = "Arabic", style = "dotted", shape = "ellipse", color = "orange"] ;
|
|
||||||
"Abs" -- "34" [style = "solid"];
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
Before Width: | Height: | Size: 129 KiB |
|
Before Width: | Height: | Size: 439 KiB |
|
Before Width: | Height: | Size: 1.8 MiB |
@@ -1,46 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
||||||
<HTML>
|
|
||||||
<HEAD>
|
|
||||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
|
||||||
<TITLE>Library-Based Grammar Engineering</TITLE>
|
|
||||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
|
||||||
<P ALIGN="center"><CENTER><H1>Library-Based Grammar Engineering</H1>
|
|
||||||
<FONT SIZE="4">
|
|
||||||
<I>VR Project 2006-2008</I><BR>
|
|
||||||
</FONT></CENTER>
|
|
||||||
|
|
||||||
<H1>Staff</H1>
|
|
||||||
<P>
|
|
||||||
Lars Borin (co-leader)
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Robin Cooper (co-leader)
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Aarne Ranta (project responsible)
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Sibylle Schupp (co-leader)
|
|
||||||
</P>
|
|
||||||
<H1>Publications</H1>
|
|
||||||
<P>
|
|
||||||
Ali El Dada, MSc Thesis
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Muhammad Humayoun, MSc Thesis
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Janna Khegai,
|
|
||||||
Language Engineering in GF, PhD Thesis, Chalmers. 2006.
|
|
||||||
</P>
|
|
||||||
<H1>Links</H1>
|
|
||||||
<P>
|
|
||||||
<A HREF="http://www.cs.chalmers.se/~aarne/GF/">GF</A>
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="http://www.cs.chalmers.se/~markus/FM/">Functional Morphology</A>
|
|
||||||
</P>
|
|
||||||
|
|
||||||
<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) -->
|
|
||||||
<!-- cmdline: txt2tags -thtml vr.txt -->
|
|
||||||
</BODY></HTML>
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
Library-Based Grammar Engineering
|
|
||||||
VR Project 2006-2008
|
|
||||||
|
|
||||||
|
|
||||||
=Staff=
|
|
||||||
|
|
||||||
Lars Borin (co-leader)
|
|
||||||
|
|
||||||
Robin Cooper (co-leader)
|
|
||||||
|
|
||||||
Aarne Ranta (project responsible)
|
|
||||||
|
|
||||||
Sibylle Schupp (co-leader)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
=Publications=
|
|
||||||
|
|
||||||
Ali El Dada, MSc Thesis
|
|
||||||
|
|
||||||
Muhammad Humayoun, MSc Thesis
|
|
||||||
|
|
||||||
Janna Khegai,
|
|
||||||
Language Engineering in GF, PhD Thesis, Chalmers. 2006.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
=Links=
|
|
||||||
|
|
||||||
[GF http://www.cs.chalmers.se/~aarne/GF/]
|
|
||||||
|
|
||||||
[Functional Morphology http://www.cs.chalmers.se/~markus/FM/]
|
|
||||||
@@ -1,136 +0,0 @@
|
|||||||
module Main where
|
|
||||||
|
|
||||||
import PGF.Editor
|
|
||||||
import PGF
|
|
||||||
|
|
||||||
import Data.Char
|
|
||||||
import System (getArgs)
|
|
||||||
|
|
||||||
-- a rough editor shell using the PGF.Edito API
|
|
||||||
-- compile:
|
|
||||||
-- cd .. ; ghc --make exper/EditShell.hs
|
|
||||||
-- use:
|
|
||||||
-- EditShell file.pgf
|
|
||||||
|
|
||||||
main = do
|
|
||||||
putStrLn "Hi, I'm the Editor! Type h for help on commands."
|
|
||||||
file:_ <- getArgs
|
|
||||||
pgf <- readPGF file
|
|
||||||
let dict = pgf2dict pgf
|
|
||||||
let st0 = new (startCat pgf)
|
|
||||||
let lang = head (languages pgf) ---- for printnames; enable choosing lang
|
|
||||||
editLoop pgf dict lang st0 -- alt 1: all editing commands
|
|
||||||
-- dialogueLoop pgf dict lang st0 -- alt 2: just refinement by parsing (see bottom)
|
|
||||||
|
|
||||||
editLoop :: PGF -> Dict -> Language -> State -> IO State
|
|
||||||
editLoop pgf dict lang st = do
|
|
||||||
putStrLn $
|
|
||||||
if null (allMetas st)
|
|
||||||
then unlines
|
|
||||||
(["The tree is complete:",prState st] ++ linearizeAll pgf (stateTree st))
|
|
||||||
else if isMetaFocus st
|
|
||||||
then "I want something of type " ++ showType (focusType st) ++
|
|
||||||
" (0 - " ++ show (length (refineMenu dict st)-1) ++ ")"
|
|
||||||
else "Do you want to change this node?"
|
|
||||||
c <- getLine
|
|
||||||
st' <- interpret pgf dict st c
|
|
||||||
editLoop pgf dict lang st'
|
|
||||||
|
|
||||||
interpret :: PGF -> Dict -> State -> String -> IO State
|
|
||||||
interpret pgf dict st c = case words c of
|
|
||||||
"r":f:_ -> do
|
|
||||||
let st' = goNextMeta (refine dict (mkCId f) st)
|
|
||||||
prLState pgf st'
|
|
||||||
return st'
|
|
||||||
"p":ws -> do
|
|
||||||
let tts = parseAll pgf (focusType st) (dropWhile (not . isSpace) c)
|
|
||||||
st' <- selectReplace dict (concat tts) st
|
|
||||||
prLState pgf st'
|
|
||||||
return st'
|
|
||||||
"a":_ -> do
|
|
||||||
t:_ <- generateRandom pgf (focusType st)
|
|
||||||
let st' = goNextMeta (replace dict t st)
|
|
||||||
prLState pgf st'
|
|
||||||
return st'
|
|
||||||
"d":_ -> do
|
|
||||||
let st' = delete st
|
|
||||||
prLState pgf st'
|
|
||||||
return st'
|
|
||||||
"m":_ -> do
|
|
||||||
putStrLn (unwords (map prCId (refineMenu dict st)))
|
|
||||||
return st
|
|
||||||
d : _ | all isDigit d -> do
|
|
||||||
let f = refineMenu dict st !! read d
|
|
||||||
let st' = goNextMeta (refine dict f st)
|
|
||||||
prLState pgf st'
|
|
||||||
return st'
|
|
||||||
p@('[':_):_ -> do
|
|
||||||
let st' = goPosition (mkPosition (read p)) st
|
|
||||||
prLState pgf st'
|
|
||||||
return st'
|
|
||||||
">":_ -> do
|
|
||||||
let st' = goNext st
|
|
||||||
prLState pgf st'
|
|
||||||
return st'
|
|
||||||
"x":_ -> do
|
|
||||||
mapM_ putStrLn [show (showPosition p) ++ showType t | (p,t) <- allMetas st]
|
|
||||||
return st
|
|
||||||
"h":_ -> putStrLn commandHelp >> return st
|
|
||||||
_ -> do
|
|
||||||
putStrLn "command not understood"
|
|
||||||
return st
|
|
||||||
|
|
||||||
prLState pgf st = do
|
|
||||||
let t = stateTree st
|
|
||||||
putStrLn (unlines ([
|
|
||||||
"Now I have:","",
|
|
||||||
prState st] ++
|
|
||||||
linearizeAll pgf t))
|
|
||||||
|
|
||||||
-- prompt selection from list of trees, such as ambiguous choice
|
|
||||||
selectReplace :: Dict -> [Tree] -> State -> IO State
|
|
||||||
selectReplace dict ts st = case ts of
|
|
||||||
[] -> putStrLn "no results" >> return st
|
|
||||||
[t] -> return $ goNextMeta $ replace dict t st
|
|
||||||
_ -> do
|
|
||||||
mapM_ putStrLn $ "choose tree by entering its number:" :
|
|
||||||
[show i ++ " : " ++ showTree t | (i,t) <- zip [0..] ts]
|
|
||||||
d <- getLine
|
|
||||||
let t = ts !! read d
|
|
||||||
return $ goNextMeta $ replace dict t st
|
|
||||||
|
|
||||||
commandHelp = unlines [
|
|
||||||
"a -- refine with a random subtree",
|
|
||||||
"d -- delete current subtree",
|
|
||||||
"h -- display this help message",
|
|
||||||
"m -- show refinement menu",
|
|
||||||
"p Anything -- parse Anything and refine with it",
|
|
||||||
"r Function -- refine with Function",
|
|
||||||
"x -- show all unknown positions and their types",
|
|
||||||
"4 -- refine with 4th item from menu (see m)",
|
|
||||||
"[1,2,3] -- go to position 1,2,3",
|
|
||||||
"> -- go to next node"
|
|
||||||
]
|
|
||||||
|
|
||||||
----------------
|
|
||||||
-- for a dialogue system, working just by parsing; questions are cat printnames
|
|
||||||
----------------
|
|
||||||
|
|
||||||
dialogueLoop :: PGF -> Dict -> Language -> State -> IO State
|
|
||||||
dialogueLoop pgf dict lang st = do
|
|
||||||
putStrLn $
|
|
||||||
if null (allMetas st)
|
|
||||||
then "Ready!\n " ++ unlines (linearizeAll pgf (stateTree st))
|
|
||||||
else if isMetaFocus st
|
|
||||||
then showPrintName pgf lang (focusType st)
|
|
||||||
else "Do you want to change this node?"
|
|
||||||
c <- getLine
|
|
||||||
st' <- interpretD pgf dict st c
|
|
||||||
dialogueLoop pgf dict lang st'
|
|
||||||
|
|
||||||
interpretD :: PGF -> Dict -> State -> String -> IO State
|
|
||||||
interpretD pgf dict st c = do
|
|
||||||
let tts = parseAll pgf (focusType st) c
|
|
||||||
st' <- selectReplace dict (concat tts) st
|
|
||||||
-- prLState pgf st'
|
|
||||||
return st'
|
|
||||||
@@ -1,461 +0,0 @@
|
|||||||
----------------------------------------------------------------------
|
|
||||||
-- |
|
|
||||||
-- Module : Evaluate
|
|
||||||
-- Maintainer : AR
|
|
||||||
-- Stability : (stable)
|
|
||||||
-- Portability : (portable)
|
|
||||||
--
|
|
||||||
-- > CVS $Date: 2005/11/01 15:39:12 $
|
|
||||||
-- > CVS $Author: aarne $
|
|
||||||
-- > CVS $Revision: 1.19 $
|
|
||||||
--
|
|
||||||
-- Computation of source terms. Used in compilation and in @cc@ command.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
module GF.Compile.Evaluate (appEvalConcrete) where
|
|
||||||
|
|
||||||
import GF.Data.Operations
|
|
||||||
import GF.Grammar.Grammar
|
|
||||||
import GF.Infra.Ident
|
|
||||||
import GF.Data.Str
|
|
||||||
import GF.Grammar.PrGrammar
|
|
||||||
import GF.Infra.Modules
|
|
||||||
import GF.Infra.Option
|
|
||||||
import GF.Grammar.Macros
|
|
||||||
import GF.Grammar.Lookup
|
|
||||||
import GF.Grammar.Refresh
|
|
||||||
import GF.Grammar.PatternMatch
|
|
||||||
import GF.Grammar.Lockfield (isLockLabel) ----
|
|
||||||
|
|
||||||
import GF.Grammar.AppPredefined
|
|
||||||
|
|
||||||
import qualified Data.Map as Map
|
|
||||||
|
|
||||||
import Data.List (nub,intersperse)
|
|
||||||
import Control.Monad (liftM2, liftM)
|
|
||||||
import Debug.Trace
|
|
||||||
|
|
||||||
|
|
||||||
data EEnv = EEnv {
|
|
||||||
computd :: Map.Map (Ident,Ident) FTerm,
|
|
||||||
temp :: Int
|
|
||||||
}
|
|
||||||
|
|
||||||
emptyEEnv = EEnv Map.empty 0
|
|
||||||
|
|
||||||
lookupComputed :: (Ident,Ident) -> STM EEnv (Maybe FTerm)
|
|
||||||
lookupComputed mc = do
|
|
||||||
env <- readSTM
|
|
||||||
return $ Map.lookup mc $ computd env
|
|
||||||
|
|
||||||
updateComputed :: (Ident,Ident) -> FTerm -> STM EEnv ()
|
|
||||||
updateComputed mc t = updateSTM (\e -> e{computd = Map.insert mc t (computd e)})
|
|
||||||
|
|
||||||
getTemp :: STM EEnv Ident
|
|
||||||
getTemp = do
|
|
||||||
env <- readSTM
|
|
||||||
updateSTM (\e -> e{temp = temp e + 1})
|
|
||||||
return $ identC ("#" ++ show (temp env))
|
|
||||||
|
|
||||||
data FTerm =
|
|
||||||
FTC Term
|
|
||||||
| FTF (Term -> FTerm)
|
|
||||||
|
|
||||||
prFTerm :: Integer -> FTerm -> String
|
|
||||||
prFTerm i t = case t of
|
|
||||||
FTC t -> prt t
|
|
||||||
FTF f -> show i +++ "->" +++ prFTerm (i + 1) (f (EInt i))
|
|
||||||
|
|
||||||
term2fterm t = case t of
|
|
||||||
Abs x b -> FTF (\t -> term2fterm (subst [(x,t)] b))
|
|
||||||
_ -> FTC t
|
|
||||||
|
|
||||||
traceFTerm c ft = ft ----trace ("\n" ++ prt c +++ "=" +++ take 60 (prFTerm 0 ft)) ft
|
|
||||||
|
|
||||||
fterm2term :: FTerm -> STM EEnv Term
|
|
||||||
fterm2term t = case t of
|
|
||||||
FTC t -> return t
|
|
||||||
FTF f -> do
|
|
||||||
x <- getTemp
|
|
||||||
b <- fterm2term $ f (Vr x)
|
|
||||||
return $ Abs x b
|
|
||||||
|
|
||||||
subst g t = case t of
|
|
||||||
Vr x -> maybe t id $ lookup x g
|
|
||||||
_ -> composSafeOp (subst g) t
|
|
||||||
|
|
||||||
|
|
||||||
appFTerm :: FTerm -> [Term] -> FTerm
|
|
||||||
appFTerm ft ts = case (ft,ts) of
|
|
||||||
(FTF f, x:xs) -> appFTerm (f x) xs
|
|
||||||
_ -> ft
|
|
||||||
{-
|
|
||||||
(FTC _, []) -> ft
|
|
||||||
(FTC f, [a]) -> case appPredefined (App f a) of
|
|
||||||
Ok (t,_) -> FTC t
|
|
||||||
_ -> error $ "error: appFTerm" +++ prFTerm 0 ft +++ unwords (map prt ts)
|
|
||||||
_ -> error $ "error: appFTerm" +++ prFTerm 0 ft +++ unwords (map prt ts)
|
|
||||||
-}
|
|
||||||
|
|
||||||
apps :: Term -> (Term,[Term])
|
|
||||||
apps t = case t of
|
|
||||||
App f a -> (f',xs ++ [a]) where (f',xs) = apps f
|
|
||||||
_ -> (t,[])
|
|
||||||
|
|
||||||
appEvalConcrete gr bt = liftM fst $ appSTM (evalConcrete gr bt) emptyEEnv
|
|
||||||
|
|
||||||
evalConcrete :: SourceGrammar -> BinTree Ident Info -> STM EEnv (BinTree Ident Info)
|
|
||||||
evalConcrete gr mo = mapMTree evaldef mo where
|
|
||||||
|
|
||||||
evaldef (f,info) = case info of
|
|
||||||
CncFun (mt@(Just (_,ty@(cont,val)))) pde ppr ->
|
|
||||||
evalIn ("\nerror in linearization of function" +++ prt f +++ ":") $
|
|
||||||
do
|
|
||||||
pde' <- case pde of
|
|
||||||
Yes de -> do
|
|
||||||
liftM yes $ pEval ty de
|
|
||||||
_ -> return pde
|
|
||||||
--- ppr' <- liftM yes $ evalPrintname gr c ppr pde'
|
|
||||||
return $ (f, CncFun mt pde' ppr) -- only cat in type actually needed
|
|
||||||
|
|
||||||
_ -> return (f,info)
|
|
||||||
|
|
||||||
pEval (context,val) trm = do ---- errIn ("parteval" +++ prt_ trm) $ do
|
|
||||||
let
|
|
||||||
vars = map fst context
|
|
||||||
args = map Vr vars
|
|
||||||
subst = [(v, Vr v) | v <- vars]
|
|
||||||
trm1 = mkApp trm args
|
|
||||||
trm3 <- recordExpand val trm1 >>= comp subst
|
|
||||||
return $ mkAbs vars trm3
|
|
||||||
|
|
||||||
recordExpand typ trm = case unComputed typ of
|
|
||||||
RecType tys -> case trm of
|
|
||||||
FV rs -> return $ FV [R [assign lab (P r lab) | (lab,_) <- tys] | r <- rs]
|
|
||||||
_ -> return $ R [assign lab (P trm lab) | (lab,_) <- tys]
|
|
||||||
_ -> return trm
|
|
||||||
|
|
||||||
comp g t = case t of
|
|
||||||
|
|
||||||
Q (IC "Predef") _ -> trace ("\nPredef:\n" ++ prt t) $ return t
|
|
||||||
|
|
||||||
Q p c -> do
|
|
||||||
md <- lookupComputed (p,c)
|
|
||||||
case md of
|
|
||||||
Nothing -> do
|
|
||||||
d <- lookRes (p,c)
|
|
||||||
updateComputed (p,c) $ traceFTerm c $ term2fterm d
|
|
||||||
return d
|
|
||||||
Just d -> fterm2term d >>= comp g
|
|
||||||
App f a -> case apps t of
|
|
||||||
(h@(Q p c),xs) | p == IC "Predef" -> do
|
|
||||||
xs' <- mapM (comp g) xs
|
|
||||||
(t',b) <- stmErr $ appPredefined (foldl App h xs')
|
|
||||||
if b then return t' else comp g t'
|
|
||||||
(h@(Q p c),xs) -> do
|
|
||||||
xs' <- mapM (comp g) xs
|
|
||||||
md <- lookupComputed (p,c)
|
|
||||||
case md of
|
|
||||||
Just ft -> do
|
|
||||||
t <- fterm2term $ appFTerm ft xs'
|
|
||||||
comp g t
|
|
||||||
Nothing -> do
|
|
||||||
d <- lookRes (p,c)
|
|
||||||
let ft = traceFTerm c $ term2fterm d
|
|
||||||
updateComputed (p,c) ft
|
|
||||||
t' <- fterm2term $ appFTerm ft xs'
|
|
||||||
comp g t'
|
|
||||||
_ -> do
|
|
||||||
f' <- comp g f
|
|
||||||
a' <- comp g a
|
|
||||||
case (f',a') of
|
|
||||||
(Abs x b,_) -> comp (ext x a' g) b
|
|
||||||
(QC _ _,_) -> returnC $ App f' a'
|
|
||||||
(FV fs, _) -> mapM (\c -> comp g (App c a')) fs >>= return . variants
|
|
||||||
(_, FV as) -> mapM (\c -> comp g (App f' c)) as >>= return . variants
|
|
||||||
|
|
||||||
(Alias _ _ d, _) -> comp g (App d a')
|
|
||||||
|
|
||||||
(S (T i cs) e,_) -> prawitz g i (flip App a') cs e
|
|
||||||
|
|
||||||
_ -> do
|
|
||||||
(t',b) <- stmErr $ appPredefined (App f' a')
|
|
||||||
if b then return t' else comp g t'
|
|
||||||
|
|
||||||
|
|
||||||
Vr x -> do
|
|
||||||
t' <- maybe (prtRaise (
|
|
||||||
"context" +++ show g +++ ": no value given to variable") x) return $ lookup x g
|
|
||||||
case t' of
|
|
||||||
_ | t == t' -> return t
|
|
||||||
_ -> comp g t'
|
|
||||||
|
|
||||||
Abs x b -> do
|
|
||||||
b' <- comp (ext x (Vr x) g) b
|
|
||||||
return $ Abs x b'
|
|
||||||
|
|
||||||
Let (x,(_,a)) b -> do
|
|
||||||
a' <- comp g a
|
|
||||||
comp (ext x a' g) b
|
|
||||||
|
|
||||||
Prod x a b -> do
|
|
||||||
a' <- comp g a
|
|
||||||
b' <- comp (ext x (Vr x) g) b
|
|
||||||
return $ Prod x a' b'
|
|
||||||
|
|
||||||
P t l | isLockLabel l -> return $ R []
|
|
||||||
---- a workaround 18/2/2005: take this away and find the reason
|
|
||||||
---- why earlier compilation destroys the lock field
|
|
||||||
|
|
||||||
|
|
||||||
P t l -> do
|
|
||||||
t' <- comp g t
|
|
||||||
case t' of
|
|
||||||
FV rs -> mapM (\c -> comp g (P c l)) rs >>= returnC . variants
|
|
||||||
R r -> maybe
|
|
||||||
(prtRaise (prt t' ++ ": no value for label") l) (comp g . snd) $
|
|
||||||
lookup l r
|
|
||||||
|
|
||||||
ExtR a (R b) -> case lookup l b of ----comp g (P (R b) l) of
|
|
||||||
Just (_,v) -> comp g v
|
|
||||||
_ -> comp g (P a l)
|
|
||||||
|
|
||||||
S (T i cs) e -> prawitz g i (flip P l) cs e
|
|
||||||
|
|
||||||
_ -> returnC $ P t' l
|
|
||||||
|
|
||||||
S t@(T _ cc) v -> do
|
|
||||||
v' <- comp g v
|
|
||||||
case v' of
|
|
||||||
FV vs -> do
|
|
||||||
ts' <- mapM (comp g . S t) vs
|
|
||||||
return $ variants ts'
|
|
||||||
_ -> case matchPattern cc v' of
|
|
||||||
Ok (c,g') -> comp (g' ++ g) c
|
|
||||||
_ | isCan v' -> prtRaise ("missing case" +++ prt v' +++ "in") t
|
|
||||||
_ -> do
|
|
||||||
t' <- comp g t
|
|
||||||
return $ S t' v' -- if v' is not canonical
|
|
||||||
|
|
||||||
S t v -> do
|
|
||||||
t' <- comp g t
|
|
||||||
v' <- comp g v
|
|
||||||
case t' of
|
|
||||||
T _ [(PV IW,c)] -> comp g c --- an optimization
|
|
||||||
T _ [(PT _ (PV IW),c)] -> comp g c
|
|
||||||
|
|
||||||
T _ [(PV z,c)] -> comp (ext z v' g) c --- another optimization
|
|
||||||
T _ [(PT _ (PV z),c)] -> comp (ext z v' g) c
|
|
||||||
|
|
||||||
FV ccs -> mapM (\c -> comp g (S c v')) ccs >>= returnC . variants
|
|
||||||
|
|
||||||
V ptyp ts -> do
|
|
||||||
vs <- stmErr $ allParamValues gr ptyp
|
|
||||||
ps <- stmErr $ mapM term2patt vs
|
|
||||||
let cc = zip ps ts
|
|
||||||
case v' of
|
|
||||||
FV vs -> mapM (\c -> comp g (S t' c)) vs >>= returnC . variants
|
|
||||||
_ -> case matchPattern cc v' of
|
|
||||||
Ok (c,g') -> comp (g' ++ g) c
|
|
||||||
_ | isCan v' -> prtRaise ("missing case" +++ prt v' +++ "in") t
|
|
||||||
_ -> return $ S t' v' -- if v' is not canonical
|
|
||||||
|
|
||||||
T _ cc -> case v' of
|
|
||||||
FV vs -> mapM (\c -> comp g (S t' c)) vs >>= returnC . variants
|
|
||||||
_ -> case matchPattern cc v' of
|
|
||||||
Ok (c,g') -> comp (g' ++ g) c
|
|
||||||
_ | isCan v' -> prtRaise ("missing case" +++ prt v' +++ "in") t
|
|
||||||
_ -> return $ S t' v' -- if v' is not canonical
|
|
||||||
|
|
||||||
Alias _ _ d -> comp g (S d v')
|
|
||||||
|
|
||||||
S (T i cs) e -> prawitz g i (flip S v') cs e
|
|
||||||
|
|
||||||
_ -> returnC $ S t' v'
|
|
||||||
|
|
||||||
-- normalize away empty tokens
|
|
||||||
K "" -> return Empty
|
|
||||||
|
|
||||||
-- glue if you can
|
|
||||||
Glue x0 y0 -> do
|
|
||||||
x <- comp g x0
|
|
||||||
y <- comp g y0
|
|
||||||
case (x,y) of
|
|
||||||
(Alias _ _ d, y) -> comp g $ Glue d y
|
|
||||||
(x, Alias _ _ d) -> comp g $ Glue x d
|
|
||||||
|
|
||||||
(S (T i cs) e, s) -> prawitz g i (flip Glue s) cs e
|
|
||||||
(s, S (T i cs) e) -> prawitz g i (Glue s) cs e
|
|
||||||
(_,Empty) -> return x
|
|
||||||
(Empty,_) -> return y
|
|
||||||
(K a, K b) -> return $ K (a ++ b)
|
|
||||||
(_, Alts (d,vs)) -> do
|
|
||||||
---- (K a, Alts (d,vs)) -> do
|
|
||||||
let glx = Glue x
|
|
||||||
comp g $ Alts (glx d, [(glx v,c) | (v,c) <- vs])
|
|
||||||
(Alts _, ka) -> checks [do
|
|
||||||
y' <- stmErr $ strsFromTerm ka
|
|
||||||
---- (Alts _, K a) -> checks [do
|
|
||||||
x' <- stmErr $ strsFromTerm x -- this may fail when compiling opers
|
|
||||||
return $ variants [
|
|
||||||
foldr1 C (map K (str2strings (glueStr v u))) | v <- x', u <- y']
|
|
||||||
---- foldr1 C (map K (str2strings (glueStr v (str a)))) | v <- x']
|
|
||||||
,return $ Glue x y
|
|
||||||
]
|
|
||||||
(FV ks,_) -> do
|
|
||||||
kys <- mapM (comp g . flip Glue y) ks
|
|
||||||
return $ variants kys
|
|
||||||
(_,FV ks) -> do
|
|
||||||
xks <- mapM (comp g . Glue x) ks
|
|
||||||
return $ variants xks
|
|
||||||
|
|
||||||
_ -> do
|
|
||||||
mapM_ checkNoArgVars [x,y]
|
|
||||||
r <- composOp (comp g) t
|
|
||||||
returnC r
|
|
||||||
|
|
||||||
Alts _ -> do
|
|
||||||
r <- composOp (comp g) t
|
|
||||||
returnC r
|
|
||||||
|
|
||||||
-- remove empty
|
|
||||||
C a b -> do
|
|
||||||
a' <- comp g a
|
|
||||||
b' <- comp g b
|
|
||||||
case (a',b') of
|
|
||||||
(Alts _, K a) -> checks [do
|
|
||||||
as <- stmErr $ strsFromTerm a' -- this may fail when compiling opers
|
|
||||||
return $ variants [
|
|
||||||
foldr1 C (map K (str2strings (plusStr v (str a)))) | v <- as]
|
|
||||||
,
|
|
||||||
return $ C a' b'
|
|
||||||
]
|
|
||||||
(Empty,_) -> returnC b'
|
|
||||||
(_,Empty) -> returnC a'
|
|
||||||
_ -> returnC $ C a' b'
|
|
||||||
|
|
||||||
-- reduce free variation as much as you can
|
|
||||||
FV ts -> mapM (comp g) ts >>= returnC . variants
|
|
||||||
|
|
||||||
-- merge record extensions if you can
|
|
||||||
ExtR r s -> do
|
|
||||||
r' <- comp g r
|
|
||||||
s' <- comp g s
|
|
||||||
case (r',s') of
|
|
||||||
(Alias _ _ d, _) -> comp g $ ExtR d s'
|
|
||||||
(_, Alias _ _ d) -> comp g $ Glue r' d
|
|
||||||
|
|
||||||
(R rs, R ss) -> stmErr $ plusRecord r' s'
|
|
||||||
(RecType rs, RecType ss) -> stmErr $ plusRecType r' s'
|
|
||||||
_ -> return $ ExtR r' s'
|
|
||||||
|
|
||||||
-- case-expand tables
|
|
||||||
-- if already expanded, don't expand again
|
|
||||||
T i@(TComp _) cs -> do
|
|
||||||
-- if there are no variables, don't even go inside
|
|
||||||
cs' <- if (null g) then return cs else mapPairsM (comp g) cs
|
|
||||||
return $ T i cs'
|
|
||||||
|
|
||||||
--- this means some extra work; should implement TSh directly
|
|
||||||
TSh i cs -> comp g $ T i [(p,v) | (ps,v) <- cs, p <- ps]
|
|
||||||
|
|
||||||
T i cs -> do
|
|
||||||
pty0 <- stmErr $ getTableType i
|
|
||||||
ptyp <- comp g pty0
|
|
||||||
case allParamValues gr ptyp of
|
|
||||||
Ok vs -> do
|
|
||||||
|
|
||||||
cs' <- mapM (compBranchOpt g) cs
|
|
||||||
sts <- stmErr $ mapM (matchPattern cs') vs
|
|
||||||
ts <- mapM (\ (c,g') -> comp (g' ++ g) c) sts
|
|
||||||
ps <- stmErr $ mapM term2patt vs
|
|
||||||
let ps' = ps --- PT ptyp (head ps) : tail ps
|
|
||||||
return $ --- V ptyp ts -- to save space, just course of values
|
|
||||||
T (TComp ptyp) (zip ps' ts)
|
|
||||||
_ -> do
|
|
||||||
cs' <- mapM (compBranch g) cs
|
|
||||||
return $ T i cs' -- happens with variable types
|
|
||||||
|
|
||||||
-- otherwise go ahead
|
|
||||||
_ -> composOp (comp g) t >>= returnC
|
|
||||||
|
|
||||||
lookRes (p,c) = case lookupResDefKind gr p c of
|
|
||||||
Ok (t,_) | noExpand p -> return t
|
|
||||||
Ok (t,0) -> comp [] t
|
|
||||||
Ok (t,_) -> return t
|
|
||||||
Bad s -> raise s
|
|
||||||
|
|
||||||
noExpand p = errVal False $ do
|
|
||||||
mo <- lookupModule gr p
|
|
||||||
return $ case getOptVal (iOpts (flags mo)) useOptimizer of
|
|
||||||
Just "noexpand" -> True
|
|
||||||
_ -> False
|
|
||||||
|
|
||||||
prtRaise s t = raise (s +++ prt t)
|
|
||||||
|
|
||||||
ext x a g = (x,a):g
|
|
||||||
|
|
||||||
returnC = return --- . computed
|
|
||||||
|
|
||||||
variants ts = case nub ts of
|
|
||||||
[t] -> t
|
|
||||||
ts -> FV ts
|
|
||||||
|
|
||||||
isCan v = case v of
|
|
||||||
Con _ -> True
|
|
||||||
QC _ _ -> True
|
|
||||||
App f a -> isCan f && isCan a
|
|
||||||
R rs -> all (isCan . snd . snd) rs
|
|
||||||
_ -> False
|
|
||||||
|
|
||||||
compBranch g (p,v) = do
|
|
||||||
let g' = contP p ++ g
|
|
||||||
v' <- comp g' v
|
|
||||||
return (p,v')
|
|
||||||
|
|
||||||
compBranchOpt g c@(p,v) = case contP p of
|
|
||||||
[] -> return c
|
|
||||||
_ -> compBranch g c
|
|
||||||
---- _ -> err (const (return c)) return $ compBranch g c
|
|
||||||
|
|
||||||
contP p = case p of
|
|
||||||
PV x -> [(x,Vr x)]
|
|
||||||
PC _ ps -> concatMap contP ps
|
|
||||||
PP _ _ ps -> concatMap contP ps
|
|
||||||
PT _ p -> contP p
|
|
||||||
PR rs -> concatMap (contP . snd) rs
|
|
||||||
|
|
||||||
PAs x p -> (x,Vr x) : contP p
|
|
||||||
|
|
||||||
PSeq p q -> concatMap contP [p,q]
|
|
||||||
PAlt p q -> concatMap contP [p,q]
|
|
||||||
PRep p -> contP p
|
|
||||||
PNeg p -> contP p
|
|
||||||
|
|
||||||
_ -> []
|
|
||||||
|
|
||||||
prawitz g i f cs e = do
|
|
||||||
cs' <- mapM (compBranch g) [(p, f v) | (p,v) <- cs]
|
|
||||||
return $ S (T i cs') e
|
|
||||||
|
|
||||||
-- | argument variables cannot be glued
|
|
||||||
checkNoArgVars :: Term -> STM EEnv Term
|
|
||||||
checkNoArgVars t = case t of
|
|
||||||
Vr (IA _) -> raise $ glueErrorMsg $ prt t
|
|
||||||
Vr (IAV _) -> raise $ glueErrorMsg $ prt t
|
|
||||||
_ -> composOp checkNoArgVars t
|
|
||||||
|
|
||||||
glueErrorMsg s =
|
|
||||||
"Cannot glue (+) term with run-time variable" +++ s ++ "." ++++
|
|
||||||
"Use Prelude.bind instead."
|
|
||||||
|
|
||||||
stmErr :: Err a -> STM s a
|
|
||||||
stmErr e = stm (\s -> do
|
|
||||||
v <- e
|
|
||||||
return (v,s)
|
|
||||||
)
|
|
||||||
|
|
||||||
evalIn :: String -> STM s a -> STM s a
|
|
||||||
evalIn msg st = stm $ \s -> case appSTM st s of
|
|
||||||
Bad e -> Bad $ msg ++++ e
|
|
||||||
Ok vs -> Ok vs
|
|
||||||
@@ -1,273 +0,0 @@
|
|||||||
----------------------------------------------------------------------
|
|
||||||
-- |
|
|
||||||
-- Module : Optimize
|
|
||||||
-- Maintainer : AR
|
|
||||||
-- Stability : (stable)
|
|
||||||
-- Portability : (portable)
|
|
||||||
--
|
|
||||||
-- > CVS $Date: 2005/09/16 13:56:13 $
|
|
||||||
-- > CVS $Author: aarne $
|
|
||||||
-- > CVS $Revision: 1.18 $
|
|
||||||
--
|
|
||||||
-- Top-level partial evaluation for GF source modules.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
module GF.Compile.Optimize (optimizeModule) where
|
|
||||||
|
|
||||||
import GF.Grammar.Grammar
|
|
||||||
import GF.Infra.Ident
|
|
||||||
import GF.Infra.Modules
|
|
||||||
import GF.Grammar.PrGrammar
|
|
||||||
import GF.Grammar.Macros
|
|
||||||
import GF.Grammar.Lookup
|
|
||||||
import GF.Grammar.Refresh
|
|
||||||
import GF.Grammar.Compute
|
|
||||||
import GF.Compile.BackOpt
|
|
||||||
import GF.Compile.CheckGrammar
|
|
||||||
import GF.Compile.Update
|
|
||||||
|
|
||||||
import GF.Compile.Evaluate
|
|
||||||
|
|
||||||
import GF.Data.Operations
|
|
||||||
import GF.Infra.CheckM
|
|
||||||
import GF.Infra.Option
|
|
||||||
|
|
||||||
import Control.Monad
|
|
||||||
import Data.List
|
|
||||||
|
|
||||||
-- | partial evaluation of concrete syntax. AR 6\/2001 -- 16\/5\/2003 -- 5\/2\/2005.
|
|
||||||
-- only do this for resource: concrete is optimized in gfc form
|
|
||||||
optimizeModule :: Options -> [(Ident,SourceModule)] -> (Ident,SourceModule) ->
|
|
||||||
Err (Ident,SourceModule)
|
|
||||||
optimizeModule opts ms mo@(_,mi) = case mi of
|
|
||||||
m0@(Module mt st fs me ops js) | st == MSComplete && isModRes m0 -> do
|
|
||||||
mo1 <- evalModule oopts ms mo
|
|
||||||
return $ case optim of
|
|
||||||
"parametrize" -> shareModule paramOpt mo1 -- parametrization and sharing
|
|
||||||
"values" -> shareModule valOpt mo1 -- tables as courses-of-values
|
|
||||||
"share" -> shareModule shareOpt mo1 -- sharing of branches
|
|
||||||
"all" -> shareModule allOpt mo1 -- first parametrize then values
|
|
||||||
"none" -> mo1 -- no optimization
|
|
||||||
_ -> mo1 -- none; default for src
|
|
||||||
_ -> evalModule oopts ms mo
|
|
||||||
where
|
|
||||||
oopts = addOptions opts (iOpts (flagsModule mo))
|
|
||||||
optim = maybe "all" id $ getOptVal oopts useOptimizer
|
|
||||||
|
|
||||||
evalModule :: Options -> [(Ident,SourceModule)] -> (Ident,SourceModule) -> Err (Ident,SourceModule)
|
|
||||||
evalModule oopts ms mo@(name,mod) = case mod of
|
|
||||||
|
|
||||||
m0@(Module mt st fs me ops js) | st == MSComplete -> case mt of
|
|
||||||
{-
|
|
||||||
-- now: don't optimize resource
|
|
||||||
|
|
||||||
_ | isModRes m0 -> do
|
|
||||||
let deps = allOperDependencies name js
|
|
||||||
ids <- topoSortOpers deps
|
|
||||||
MGrammar (mod' : _) <- foldM evalOp gr ids
|
|
||||||
return $ mod'
|
|
||||||
-}
|
|
||||||
MTConcrete a -> do
|
|
||||||
-----
|
|
||||||
js0 <- appEvalConcrete gr js
|
|
||||||
js' <- mapMTree (evalCncInfo oopts gr name a) js0 ---- <- gr0 6/12/2005
|
|
||||||
return $ (name, Module mt st fs me ops js')
|
|
||||||
|
|
||||||
_ -> return $ (name,mod)
|
|
||||||
_ -> return $ (name,mod)
|
|
||||||
where
|
|
||||||
gr0 = MGrammar $ ms
|
|
||||||
gr = MGrammar $ (name,mod) : ms
|
|
||||||
|
|
||||||
evalOp g@(MGrammar ((_, m) : _)) i = do
|
|
||||||
info <- lookupTree prt i $ jments m
|
|
||||||
info' <- evalResInfo oopts gr (i,info)
|
|
||||||
return $ updateRes g name i info'
|
|
||||||
|
|
||||||
-- | only operations need be compiled in a resource, and this is local to each
|
|
||||||
-- definition since the module is traversed in topological order
|
|
||||||
evalResInfo :: Options -> SourceGrammar -> (Ident,Info) -> Err Info
|
|
||||||
evalResInfo oopts gr (c,info) = case info of
|
|
||||||
|
|
||||||
ResOper pty pde -> eIn "operation" $ do
|
|
||||||
pde' <- case pde of
|
|
||||||
Yes de | optres -> liftM yes $ comp de
|
|
||||||
_ -> return pde
|
|
||||||
return $ ResOper pty pde'
|
|
||||||
|
|
||||||
_ -> return info
|
|
||||||
where
|
|
||||||
comp = if optres then computeConcrete gr else computeConcreteRec gr
|
|
||||||
eIn cat = errIn ("Error optimizing" +++ cat +++ prt c +++ ":")
|
|
||||||
optim = maybe "all" id $ getOptVal oopts useOptimizer
|
|
||||||
optres = case optim of
|
|
||||||
"noexpand" -> False
|
|
||||||
_ -> True
|
|
||||||
|
|
||||||
|
|
||||||
evalCncInfo ::
|
|
||||||
Options -> SourceGrammar -> Ident -> Ident -> (Ident,Info) -> Err (Ident,Info)
|
|
||||||
evalCncInfo opts gr cnc abs (c,info) = errIn ("optimizing" +++ prt c) $ case info of
|
|
||||||
|
|
||||||
CncCat ptyp pde ppr -> do
|
|
||||||
|
|
||||||
pde' <- case (ptyp,pde) of
|
|
||||||
(Yes typ, Yes de) ->
|
|
||||||
liftM yes $ pEval ([(varStr, typeStr)], typ) de
|
|
||||||
(Yes typ, Nope) ->
|
|
||||||
liftM yes $ mkLinDefault gr typ >>= partEval noOptions gr ([(varStr, typeStr)],typ)
|
|
||||||
(May b, Nope) ->
|
|
||||||
return $ May b
|
|
||||||
_ -> return pde -- indirection
|
|
||||||
|
|
||||||
ppr' <- liftM yes $ evalPrintname gr c ppr (yes $ K $ prt c)
|
|
||||||
|
|
||||||
return (c, CncCat ptyp pde' ppr')
|
|
||||||
|
|
||||||
CncFun (mt@(Just (_,ty@(cont,val)))) pde ppr ->
|
|
||||||
eIn ("linearization in type" +++ prt (mkProd (cont,val,[])) ++++ "of function") $ do
|
|
||||||
pde' <- case pde of
|
|
||||||
----- Yes de -> do
|
|
||||||
----- liftM yes $ pEval ty de
|
|
||||||
_ -> return pde
|
|
||||||
ppr' <- liftM yes $ evalPrintname gr c ppr pde'
|
|
||||||
return $ (c, CncFun mt pde' ppr') -- only cat in type actually needed
|
|
||||||
|
|
||||||
_ -> return (c,info)
|
|
||||||
where
|
|
||||||
pEval = partEval opts gr
|
|
||||||
eIn cat = errIn ("Error optimizing" +++ cat +++ prt c +++ ":")
|
|
||||||
|
|
||||||
-- | the main function for compiling linearizations
|
|
||||||
partEval :: Options -> SourceGrammar -> (Context,Type) -> Term -> Err Term
|
|
||||||
partEval opts gr (context, val) trm = errIn ("parteval" +++ prt_ trm) $ do
|
|
||||||
let vars = map fst context
|
|
||||||
args = map Vr vars
|
|
||||||
subst = [(v, Vr v) | v <- vars]
|
|
||||||
trm1 = mkApp trm args
|
|
||||||
trm3 <- if globalTable
|
|
||||||
then etaExpand trm1 >>= comp subst >>= outCase subst
|
|
||||||
else etaExpand trm1 >>= comp subst
|
|
||||||
return $ mkAbs vars trm3
|
|
||||||
|
|
||||||
where
|
|
||||||
|
|
||||||
globalTable = oElem showAll opts --- i -all
|
|
||||||
|
|
||||||
comp g t = {- refreshTerm t >>= -} computeTerm gr g t
|
|
||||||
|
|
||||||
etaExpand t = recordExpand val t --- >>= caseEx -- done by comp
|
|
||||||
|
|
||||||
outCase subst t = do
|
|
||||||
pts <- getParams context
|
|
||||||
let (args,ptyps) = unzip $ filter (flip occur t . fst) pts
|
|
||||||
if null args
|
|
||||||
then return t
|
|
||||||
else do
|
|
||||||
let argtyp = RecType $ tuple2recordType ptyps
|
|
||||||
let pvars = map (Vr . zIdent . prt) args -- gets eliminated
|
|
||||||
patt <- term2patt $ R $ tuple2record $ pvars
|
|
||||||
let t' = replace (zip args pvars) t
|
|
||||||
t1 <- comp subst $ T (TTyped argtyp) [(patt, t')]
|
|
||||||
return $ S t1 $ R $ tuple2record args
|
|
||||||
|
|
||||||
--- notice: this assumes that all lin types follow the "old JFP style"
|
|
||||||
getParams = liftM concat . mapM getParam
|
|
||||||
getParam (argv,RecType rs) = return
|
|
||||||
[(P (Vr argv) lab, ptyp) | (lab,ptyp) <- rs, not (isLinLabel lab)]
|
|
||||||
---getParam (_,ty) | ty==typeStr = return [] --- in lindef
|
|
||||||
getParam (av,ty) =
|
|
||||||
Bad ("record type expected not" +++ prt ty +++ "for" +++ prt av)
|
|
||||||
--- all lin types are rec types
|
|
||||||
|
|
||||||
replace :: [(Term,Term)] -> Term -> Term
|
|
||||||
replace reps trm = case trm of
|
|
||||||
-- this is the important case
|
|
||||||
P _ _ -> maybe trm id $ lookup trm reps
|
|
||||||
_ -> composSafeOp (replace reps) trm
|
|
||||||
|
|
||||||
occur t trm = case trm of
|
|
||||||
|
|
||||||
-- this is the important case
|
|
||||||
P _ _ -> t == trm
|
|
||||||
S x y -> occur t y || occur t x
|
|
||||||
App f x -> occur t x || occur t f
|
|
||||||
Abs _ f -> occur t f
|
|
||||||
R rs -> any (occur t) (map (snd . snd) rs)
|
|
||||||
T _ cs -> any (occur t) (map snd cs)
|
|
||||||
C x y -> occur t x || occur t y
|
|
||||||
Glue x y -> occur t x || occur t y
|
|
||||||
ExtR x y -> occur t x || occur t y
|
|
||||||
FV ts -> any (occur t) ts
|
|
||||||
V _ ts -> any (occur t) ts
|
|
||||||
Let (_,(_,x)) y -> occur t x || occur t y
|
|
||||||
_ -> False
|
|
||||||
|
|
||||||
|
|
||||||
-- here we must be careful not to reduce
|
|
||||||
-- variants {{s = "Auto" ; g = N} ; {s = "Wagen" ; g = M}}
|
|
||||||
-- {s = variants {"Auto" ; "Wagen"} ; g = variants {N ; M}} ;
|
|
||||||
|
|
||||||
recordExpand :: Type -> Term -> Err Term
|
|
||||||
recordExpand typ trm = case unComputed typ of
|
|
||||||
RecType tys -> case trm of
|
|
||||||
FV rs -> return $ FV [R [assign lab (P r lab) | (lab,_) <- tys] | r <- rs]
|
|
||||||
_ -> return $ R [assign lab (P trm lab) | (lab,_) <- tys]
|
|
||||||
_ -> return trm
|
|
||||||
|
|
||||||
|
|
||||||
-- | auxiliaries for compiling the resource
|
|
||||||
|
|
||||||
mkLinDefault :: SourceGrammar -> Type -> Err Term
|
|
||||||
mkLinDefault gr typ = do
|
|
||||||
case unComputed typ of
|
|
||||||
RecType lts -> mapPairsM mkDefField lts >>= (return . Abs varStr . R . mkAssign)
|
|
||||||
_ -> prtBad "linearization type must be a record type, not" typ
|
|
||||||
where
|
|
||||||
mkDefField typ = case unComputed typ of
|
|
||||||
Table p t -> do
|
|
||||||
t' <- mkDefField t
|
|
||||||
let T _ cs = mkWildCases t'
|
|
||||||
return $ T (TWild p) cs
|
|
||||||
Sort "Str" -> return $ Vr varStr
|
|
||||||
QC q p -> lookupFirstTag gr q p
|
|
||||||
RecType r -> do
|
|
||||||
let (ls,ts) = unzip r
|
|
||||||
ts' <- mapM mkDefField ts
|
|
||||||
return $ R $ [assign l t | (l,t) <- zip ls ts']
|
|
||||||
_ | isTypeInts typ -> return $ EInt 0 -- exists in all as first val
|
|
||||||
_ -> prtBad "linearization type field cannot be" typ
|
|
||||||
|
|
||||||
-- | Form the printname: if given, compute. If not, use the computed
|
|
||||||
-- lin for functions, cat name for cats (dispatch made in evalCncDef above).
|
|
||||||
--- We cannot use linearization at this stage, since we do not know the
|
|
||||||
--- defaults we would need for question marks - and we're not yet in canon.
|
|
||||||
evalPrintname :: SourceGrammar -> Ident -> MPr -> Perh Term -> Err Term
|
|
||||||
evalPrintname gr c ppr lin =
|
|
||||||
case ppr of
|
|
||||||
Yes pr -> comp pr
|
|
||||||
_ -> case lin of
|
|
||||||
Yes t -> return $ K $ clean $ prt $ oneBranch t ---- stringFromTerm
|
|
||||||
_ -> return $ K $ prt c ----
|
|
||||||
where
|
|
||||||
comp = computeConcrete gr
|
|
||||||
|
|
||||||
oneBranch t = case t of
|
|
||||||
Abs _ b -> oneBranch b
|
|
||||||
R (r:_) -> oneBranch $ snd $ snd r
|
|
||||||
T _ (c:_) -> oneBranch $ snd c
|
|
||||||
V _ (c:_) -> oneBranch c
|
|
||||||
FV (t:_) -> oneBranch t
|
|
||||||
C x y -> C (oneBranch x) (oneBranch y)
|
|
||||||
S x _ -> oneBranch x
|
|
||||||
P x _ -> oneBranch x
|
|
||||||
Alts (d,_) -> oneBranch d
|
|
||||||
_ -> t
|
|
||||||
|
|
||||||
--- very unclean cleaner
|
|
||||||
clean s = case s of
|
|
||||||
'+':'+':' ':cs -> clean cs
|
|
||||||
'"':cs -> clean cs
|
|
||||||
c:cs -> c: clean cs
|
|
||||||
_ -> s
|
|
||||||
|
|
||||||
@@ -1,119 +0,0 @@
|
|||||||
%define name GF
|
|
||||||
%define version 3.0
|
|
||||||
%define release 1
|
|
||||||
|
|
||||||
Name: %{name}
|
|
||||||
Summary: Grammatical Framework
|
|
||||||
Version: %{version}
|
|
||||||
Release: %{release}
|
|
||||||
License: GPL
|
|
||||||
Group: Sciences/Other
|
|
||||||
Vendor: The Language Technology Group
|
|
||||||
URL: http://www.cs.chalmers.se/~aarne/GF/
|
|
||||||
Source: GF-%{version}.tgz
|
|
||||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
|
|
||||||
BuildRequires: ghc
|
|
||||||
|
|
||||||
%description
|
|
||||||
The Grammatical Framework (=GF) is a grammar formalism based on type theory.
|
|
||||||
It consists of
|
|
||||||
|
|
||||||
* a special-purpose programming language
|
|
||||||
* a compiler of the language
|
|
||||||
* a generic grammar processor
|
|
||||||
|
|
||||||
The compiler reads GF grammars from user-provided files, and the
|
|
||||||
generic grammar processor performs various tasks with the grammars:
|
|
||||||
|
|
||||||
* generation
|
|
||||||
* parsing
|
|
||||||
* translation
|
|
||||||
* type checking
|
|
||||||
* computation
|
|
||||||
* paraphrasing
|
|
||||||
* random generation
|
|
||||||
* syntax editing
|
|
||||||
|
|
||||||
GF particularly addresses the following aspects of grammars:
|
|
||||||
|
|
||||||
* multilinguality (parallel grammars for different languages)
|
|
||||||
* semantics (semantic conditions of well-formedness, semantic
|
|
||||||
properties of expressions)
|
|
||||||
* grammar engineering (modularity, information hiding, reusable
|
|
||||||
libraries)
|
|
||||||
|
|
||||||
|
|
||||||
%package editor
|
|
||||||
Summary: Java syntax editor for Grammatical Framework (GF).
|
|
||||||
Group: Sciences/Other
|
|
||||||
Requires: %{name}
|
|
||||||
|
|
||||||
%description editor
|
|
||||||
This package contains the syntax editor GUI for GF.
|
|
||||||
|
|
||||||
%package editor2
|
|
||||||
Summary: Java syntax editor for Grammatical Framework (GF).
|
|
||||||
Group: Sciences/Other
|
|
||||||
Requires: %{name}
|
|
||||||
|
|
||||||
%description editor2
|
|
||||||
This package contains the syntax editor GUI for GF with printname enhancements and HTML support.
|
|
||||||
|
|
||||||
|
|
||||||
%prep
|
|
||||||
rm -rf $RPM_BUILD_ROOT
|
|
||||||
%setup -q
|
|
||||||
|
|
||||||
%build
|
|
||||||
cd src
|
|
||||||
%configure
|
|
||||||
make all
|
|
||||||
|
|
||||||
%install
|
|
||||||
cd src
|
|
||||||
%makeinstall
|
|
||||||
|
|
||||||
%clean
|
|
||||||
rm -rf $RPM_BUILD_ROOT
|
|
||||||
|
|
||||||
%files
|
|
||||||
%defattr(-,root,root,0755)
|
|
||||||
%{_bindir}/gf
|
|
||||||
%{_bindir}/gfdoc
|
|
||||||
%doc LICENSE README doc/{DocGF.pdf,gf2-highlights.html,index.html}
|
|
||||||
|
|
||||||
%files editor
|
|
||||||
%defattr(-,root,root,0755)
|
|
||||||
%{_bindir}/jgf
|
|
||||||
%{_datadir}/%{name}-%{version}/gf-java.jar
|
|
||||||
|
|
||||||
%files editor2
|
|
||||||
%defattr(-,root,root,0755)
|
|
||||||
%{_bindir}/gfeditor
|
|
||||||
%{_datadir}/%{name}-%{version}/gfeditor.jar
|
|
||||||
|
|
||||||
|
|
||||||
%changelog
|
|
||||||
* Tue Jun 21 2005 Hans-Joachim Daniels <daniels@ira.uka.de> 2.3pre
|
|
||||||
- added the printnames and HTML enhanced editor as editor2
|
|
||||||
|
|
||||||
* Thu May 12 2005 Bjorn Bringert <bringert@cs.chalmers.se> 2.2pre2-1
|
|
||||||
- Split package into gf and gf-editor packages.
|
|
||||||
|
|
||||||
* Wed May 11 2005 Bjorn Bringert <bringert@cs.chalmers.se> 2.2pre1-1
|
|
||||||
- Release of GF 2.2
|
|
||||||
|
|
||||||
* Mon Nov 8 2004 Aarne Ranta <aarne@cs.chalmers.se> 2.1-1
|
|
||||||
- Release of GF 2.1
|
|
||||||
|
|
||||||
* Thu Jun 24 2004 Bjorn Bringert <bringert@cs.chalmers.se> 2.0-2
|
|
||||||
- Set ownership correctly.
|
|
||||||
- Move jar-file to share (thanks to Anders Carlsson for pointing this out.)
|
|
||||||
- Added vendor tag.
|
|
||||||
|
|
||||||
* Tue Jun 22 2004 Bjorn Bringert <bringert@cs.chalmers.se> 2.0-1
|
|
||||||
- Include gfdoc binary
|
|
||||||
|
|
||||||
* Mon Jun 21 2004 Bjorn Bringert <bringert@cs.chalmers.se> 2.0-1
|
|
||||||
- Initial packaging
|
|
||||||
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
<?xml version="1.0"?>
|
|
||||||
<Wix xmlns="http://schemas.microsoft.com/wix/2003/01/wi">
|
|
||||||
<Product Id="4717AF5D-52AC-4D13-85E6-D87278CE9BBC"
|
|
||||||
UpgradeCode="0BB7BB08-1A79-4981-A03F-32B401B01010"
|
|
||||||
Name="Grammatical Framework, version @PACKAGE_VERSION@"
|
|
||||||
Language="1033" Version="2.2" Manufacturer="The GF Developers">
|
|
||||||
<Package Id="????????-????-????-????-????????????"
|
|
||||||
Description="Grammatical Framework, version @PACKAGE_VERSION@"
|
|
||||||
Comments="This package contains the Grammatical Framework system, version @PACKAGE_VERSION@."
|
|
||||||
InstallerVersion="200" Compressed="yes" />
|
|
||||||
|
|
||||||
<Media Id="1" Cabinet="gf.cab" EmbedCab="yes" />
|
|
||||||
|
|
||||||
<Directory Id="TARGETDIR" Name="SourceDir">
|
|
||||||
|
|
||||||
<Directory Id="ProgramFilesFolder">
|
|
||||||
<Directory Id="INSTALLDIR" Name="GF-@PACKAGE_VERSION@">
|
|
||||||
|
|
||||||
<Component Id="GFBinary" Guid="E2A44A6C-0252-4346-85AE-BC6A16BFB0FC" DiskId="1">
|
|
||||||
<File Id="GFEXE" Name="gf.exe" src="../bin/gf.exe" />
|
|
||||||
<Shortcut Id="GFStartMenu" Directory="GFProgramMenuDir"
|
|
||||||
Name="GF" Target="[!GFEXE]" />
|
|
||||||
</Component>
|
|
||||||
|
|
||||||
<Component Id="GFDocBinary" Guid="BDCA6F34-EE0A-4E72-8D00-CB7CAF3CEAEA" DiskId="1">
|
|
||||||
<File Id="GFDocEXE" Name="gfdoc.exe" src="tools/gfdoc.exe" />
|
|
||||||
</Component>
|
|
||||||
|
|
||||||
<Component Id="GFEditor" Guid="39F885F7-BC49-4CBC-9DCD-569C95AA3364" DiskId="1">
|
|
||||||
<Environment Id="GFHomeEnv" Name="GF_HOME" Action="create" Part="all"
|
|
||||||
Permanent="no" Value="[INSTALLDIR]" />
|
|
||||||
<File Id="GFEditorBat" Name="jgf.bat" src="jgf.bat" />
|
|
||||||
<File Id="GFEditorJar" Name="gf-java.jar" src="JavaGUI/gf-java.jar" />
|
|
||||||
<Shortcut Id="GFEditorStartMenu" Directory="GFProgramMenuDir"
|
|
||||||
Name="GFEditor" LongName="GF Editor" Target="[!GFEditorBat]"
|
|
||||||
WorkingDirectory="INSTALLDIR" />
|
|
||||||
</Component>
|
|
||||||
|
|
||||||
<Directory Id="GFDocDir" Name="doc">
|
|
||||||
<Component Id="GFDoc" Guid="23BEEBBF-F9AB-459F-B8D2-8414BB47834A" DiskId="1">
|
|
||||||
<File Id="GFReadme" Name="README.txt" src="../README" />
|
|
||||||
<File Id="GFLicenee" Name="LICENSE.txt" src="../LICENSE" />
|
|
||||||
</Component>
|
|
||||||
</Directory>
|
|
||||||
|
|
||||||
</Directory>
|
|
||||||
</Directory>
|
|
||||||
|
|
||||||
<Directory Id="ProgramMenuFolder" Name="PMenu" LongName="Programs">
|
|
||||||
<Directory Id="GFProgramMenuDir" Name='GF-@PACKAGE_VERSION@' />
|
|
||||||
</Directory>
|
|
||||||
|
|
||||||
</Directory>
|
|
||||||
|
|
||||||
<Feature Id="ProductFeature" Title="Feature Title" Level="1">
|
|
||||||
<ComponentRef Id="GFBinary" />
|
|
||||||
<ComponentRef Id="GFDocBinary" />
|
|
||||||
<ComponentRef Id="GFEditor" />
|
|
||||||
<ComponentRef Id="GFDoc" />
|
|
||||||
</Feature>
|
|
||||||
|
|
||||||
</Product>
|
|
||||||
</Wix>
|
|
||||||
@@ -1,98 +0,0 @@
|
|||||||
# GF ATK configuration file
|
|
||||||
# ------------------------
|
|
||||||
|
|
||||||
# -- Basic audio signal processing --
|
|
||||||
|
|
||||||
SOURCEFORMAT = HAUDIO
|
|
||||||
SOURCERATE = 625
|
|
||||||
|
|
||||||
# Set in GF/System/ATKSpeechInput.hs
|
|
||||||
# TARGETKIND = MFCC_0_D_A
|
|
||||||
|
|
||||||
TARGETRATE = 100000.0
|
|
||||||
WINDOWSIZE = 250000.0
|
|
||||||
ENORMALISE = F
|
|
||||||
ZMEANSOURCE = F
|
|
||||||
USEHAMMING = T
|
|
||||||
PREEMCOEF = 0.97
|
|
||||||
USEPOWER = T
|
|
||||||
NUMCHANS = 26
|
|
||||||
CEPLIFTER = 22
|
|
||||||
NUMCEPS = 12
|
|
||||||
SILFLOOR = 50.0
|
|
||||||
USESILDET = T
|
|
||||||
MEASURESIL = F
|
|
||||||
OUTSILWARN = T
|
|
||||||
|
|
||||||
# -- Silence detection ---
|
|
||||||
|
|
||||||
HPARM: CALWINDOW = 40
|
|
||||||
HPARM: SPEECHTHRESH = 9.0
|
|
||||||
HPARM: SILDISCARD = 10.0
|
|
||||||
HPARM: SILENERGY = 0.0
|
|
||||||
HPARM: SPCSEQCOUNT = 10
|
|
||||||
HPARM: SPCGLCHCOUNT = 0
|
|
||||||
HPARM: SILGLCHCOUNT = 2
|
|
||||||
HPARM: SILSEQCOUNT = 50
|
|
||||||
|
|
||||||
# -- Cepstral mean ---
|
|
||||||
|
|
||||||
HPARM: CMNTCONST = 0.995
|
|
||||||
HPARM: CMNRESETONSTOP = F
|
|
||||||
HPARM: CMNMINFRAMES = 12
|
|
||||||
|
|
||||||
# -- Recogniser --
|
|
||||||
|
|
||||||
AREC: TRBAKFREQ = 1
|
|
||||||
|
|
||||||
# hands free, don't return results until end
|
|
||||||
AREC: RUNMODE = 01441
|
|
||||||
|
|
||||||
AREC: GENBEAM = 200.0
|
|
||||||
AREC: WORDBEAM = 175.0
|
|
||||||
AREC: WORDPEN = -10.0
|
|
||||||
|
|
||||||
HNET: FORCECXTEXP = T
|
|
||||||
HNET: ALLOWXWRDEXP = F
|
|
||||||
HNET: MARKSUBLAT = F
|
|
||||||
ARMAN: AUTOSIL = F
|
|
||||||
|
|
||||||
HREC: CONFSCALE = 0.15
|
|
||||||
HREC: CONFOFFSET = 0.0
|
|
||||||
#HREC: CONFBGHMM = bghmm
|
|
||||||
|
|
||||||
# -- Set visibility and positions of ATK controls --
|
|
||||||
|
|
||||||
AIN: DISPSHOW = T
|
|
||||||
AIN: DISPXORIGIN = 440
|
|
||||||
AIN: DISPYORIGIN = 220
|
|
||||||
AIN: DISPHEIGHT = 40
|
|
||||||
AIN: DISPWIDTH = 160
|
|
||||||
|
|
||||||
ACODE: DISPSHOW = F
|
|
||||||
ACODE: DISPXORIGIN = 40
|
|
||||||
ACODE: DISPYORIGIN = 220
|
|
||||||
ACODE: DISPHEIGHT = 220
|
|
||||||
ACODE: DISPWIDTH = 380
|
|
||||||
ACODE: MAXFGFEATS = 13
|
|
||||||
ACODE: NUMSTREAMS = 1
|
|
||||||
|
|
||||||
AREC: DISPSHOW = T
|
|
||||||
AREC: DISPXORIGIN = 40
|
|
||||||
AREC: DISPYORIGIN = 20
|
|
||||||
AREC: DISPHEIGHT = 160
|
|
||||||
AREC: DISPWIDTH = 560
|
|
||||||
|
|
||||||
|
|
||||||
# -- Debugging --
|
|
||||||
|
|
||||||
HMMSET: TRACE = 0
|
|
||||||
ADICT: TRACE = 0
|
|
||||||
AGRAM: TRACE = 0
|
|
||||||
GGRAM: TRACE = 0
|
|
||||||
AREC: TRACE = 0
|
|
||||||
ARMAN: TRACE = 0
|
|
||||||
HPARM: TRACE = 0
|
|
||||||
HNET: TRACE = 0
|
|
||||||
HREC: TRACE = 0
|
|
||||||
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
prefix="@prefix@"
|
|
||||||
|
|
||||||
case "@host@" in
|
|
||||||
*-cygwin)
|
|
||||||
prefix=`cygpath -w "$prefix"`;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
exec_prefix="@exec_prefix@"
|
|
||||||
GF_BIN_DIR="@bindir@"
|
|
||||||
GF_DATA_DIR="@datadir@/GF-@PACKAGE_VERSION@"
|
|
||||||
|
|
||||||
GFBIN="$GF_BIN_DIR/gf"
|
|
||||||
|
|
||||||
if [ ! -x "${GFBIN}" ]; then
|
|
||||||
GF_BIN_DIR=`dirname $0`
|
|
||||||
GFBIN="$GF_BIN_DIR/gf"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -x "${GFBIN}" ]; then
|
|
||||||
GFBIN=`which gf`
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -x "${GFBIN}" ]; then
|
|
||||||
echo "gf not found."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
exec $GFBIN --batch "$@"
|
|
||||||
@@ -1,169 +0,0 @@
|
|||||||
|
|
||||||
# checking that a file is haddocky:
|
|
||||||
# - checking if it has an export list
|
|
||||||
# - if there is no export list, it tries to find all defined functions
|
|
||||||
# - checking that all exported functions have type signatures
|
|
||||||
# - checking that the module header is OK
|
|
||||||
|
|
||||||
# changes on files:
|
|
||||||
# - transforming hard space to ordinary space
|
|
||||||
|
|
||||||
# limitations:
|
|
||||||
# - there might be some problems with nested comments
|
|
||||||
# - cannot handle type signatures for several functions
|
|
||||||
# (i.e. "a, b, c :: t")
|
|
||||||
# but on the other hand -- haddock has some problems with these too...
|
|
||||||
|
|
||||||
$operChar = qr/[\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]/;
|
|
||||||
$operCharColon = qr/[\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]/;
|
|
||||||
$nonOperChar = qr/[^\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]/;
|
|
||||||
$nonOperCharColon = qr/[^\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]/;
|
|
||||||
|
|
||||||
$operSym = qr/$operChar $operCharColon*/x;
|
|
||||||
$funSym = qr/[a-z] \w* \'*/x;
|
|
||||||
$funOrOper = qr/(?: $funSym | \($operSym\) )/x;
|
|
||||||
|
|
||||||
$keyword = qr/(?: type | data | module | newtype | infix[lr]? | import | instance | class )/x;
|
|
||||||
$keyOper = qr/^(?: \.\. | \:\:? | \= | \\ | \| | \<\- | \-\> | \@ | \~ | \=\> | \. )$/x;
|
|
||||||
|
|
||||||
sub check_headerline {
|
|
||||||
my ($title, $regexp) = @_;
|
|
||||||
if (s/^-- \s $title \s* : \s+ (.+?) \s*\n//sx) {
|
|
||||||
$name = $1;
|
|
||||||
push @ERR, "Incorrect ".lcfirst $title.": $name"
|
|
||||||
unless $name =~ $regexp;
|
|
||||||
return $&;
|
|
||||||
} else {
|
|
||||||
push @ERR, "Header missing: ".lcfirst $title."";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($#ARGV >= 0) {
|
|
||||||
@FILES = @ARGV;
|
|
||||||
} else {
|
|
||||||
# @dirs = qw{. api canonical cf cfgm compile for-ghc-nofud
|
|
||||||
# grammar infra notrace parsers shell
|
|
||||||
# source speech translate useGrammar util visualization
|
|
||||||
# GF GF/* GF/*/* GF/*/*/*};
|
|
||||||
@dirs = qw{GF GF/* GF/*/* GF/*/*/*};
|
|
||||||
@FILES = grep(!/\/(Par|Lex)(GF|GFC|CFG)\.hs$/,
|
|
||||||
glob "{".join(",",@dirs)."}/*.hs");
|
|
||||||
}
|
|
||||||
|
|
||||||
for $file (@FILES) {
|
|
||||||
$file =~ s/\.hs//;
|
|
||||||
|
|
||||||
open F, "<$file.hs";
|
|
||||||
$_ = join "", <F>;
|
|
||||||
close F;
|
|
||||||
|
|
||||||
@ERR = ();
|
|
||||||
|
|
||||||
# substituting hard spaces for ordinary spaces
|
|
||||||
$nchars = tr/\240/ /;
|
|
||||||
if ($nchars > 0) {
|
|
||||||
push @ERR, "!! > Substituted $nchars hard spaces";
|
|
||||||
open F, ">$file.hs";
|
|
||||||
print F $_;
|
|
||||||
close F;
|
|
||||||
}
|
|
||||||
|
|
||||||
# the module header
|
|
||||||
$hdr_module = $module = "";
|
|
||||||
|
|
||||||
s/^ \{-\# \s+ OPTIONS \s+ -cpp \s+ \#-\} //sx; # removing ghc options (cpp)
|
|
||||||
s/^ \s+ //sx; # removing initial whitespace
|
|
||||||
s/^ (--+ \s* \n) +//sx; # removing initial comment lines
|
|
||||||
unless (s/^ -- \s \| \s* \n//sx) {
|
|
||||||
push @ERR, "Incorrect module header";
|
|
||||||
} else {
|
|
||||||
$hdr_module = s/^-- \s Module \s* : \s+ (.+?) \s*\n//sx ? $1 : "";
|
|
||||||
&check_headerline("Maintainer", qr/^ [\wåäöÅÄÖüÜ\s\@\.]+ $/x);
|
|
||||||
&check_headerline("Stability", qr/.*/);
|
|
||||||
&check_headerline("Portability", qr/.*/);
|
|
||||||
s/^ (--+ \s* \n) +//sx;
|
|
||||||
push @ERR, "Missing CVS information"
|
|
||||||
unless s/^(-- \s+ \> \s+ CVS \s+ \$ .*? \$ \s* \n)+//sx;
|
|
||||||
s/^ (--+ \s* \n) +//sx;
|
|
||||||
push @ERR, "Missing module description"
|
|
||||||
unless /^ -- \s+ [^\(]/x;
|
|
||||||
}
|
|
||||||
|
|
||||||
# removing comments
|
|
||||||
s/\{- .*? -\}//gsx;
|
|
||||||
s/-- ($nonOperSymColon .*? \n | \n)/\n/gx;
|
|
||||||
|
|
||||||
# removing \n in front of whitespace (for simplification)
|
|
||||||
s/\n+[ \t]/ /gs;
|
|
||||||
|
|
||||||
# the export list
|
|
||||||
$exportlist = "";
|
|
||||||
|
|
||||||
if (/\n module \s+ ((?: \w | \.)+) \s+ \( (.*?) \) \s+ where/sx) {
|
|
||||||
($module, $exportlist) = ($1, $2);
|
|
||||||
|
|
||||||
$exportlist =~ s/\b module \s+ [A-Z] \w*//gsx;
|
|
||||||
$exportlist =~ s/\(\.\.\)//g;
|
|
||||||
|
|
||||||
} elsif (/\n module \s+ ((?: \w | \.)+) \s+ where/sx) {
|
|
||||||
$module = $1;
|
|
||||||
|
|
||||||
# modules without export lists
|
|
||||||
# push @ERR, "No export list";
|
|
||||||
|
|
||||||
# function definitions
|
|
||||||
while (/^ (.*? $nonOperCharColon) = (?! $operCharColon)/gmx) {
|
|
||||||
$defn = $1;
|
|
||||||
next if $defn =~ /^ $keyword \b/x;
|
|
||||||
|
|
||||||
if ($defn =~ /\` ($funSym) \`/x) {
|
|
||||||
$fn = $1;
|
|
||||||
} elsif ($defn =~ /(?<! $operCharColon) ($operSym)/x
|
|
||||||
&& $1 !~ $keyOper) {
|
|
||||||
$fn = "($1)";
|
|
||||||
} elsif ($defn =~ /^($funSym)/x) {
|
|
||||||
$fn = $1;
|
|
||||||
} else {
|
|
||||||
push @ERR, "!! > Error in function defintion: $defn";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
$exportlist .= " $fn ";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
push @ERR, "No module header found";
|
|
||||||
}
|
|
||||||
|
|
||||||
push @ERR, "Module names not matching: $module != $hdr_module"
|
|
||||||
if $hdr_module && $module !~ /\Q$hdr_module\E$/;
|
|
||||||
|
|
||||||
# fixing exportlist (double spaces as separator)
|
|
||||||
$exportlist = " $exportlist ";
|
|
||||||
$exportlist =~ s/(\s | \,)+/ /gx;
|
|
||||||
|
|
||||||
# removing functions with type signatures from export list
|
|
||||||
while (/^ ($funOrOper (\s* , \s* $funOrOper)*) \s* ::/gmx) {
|
|
||||||
$functionlist = $1;
|
|
||||||
while ($functionlist =~ s/^ ($funOrOper) (\s* , \s*)?//x) {
|
|
||||||
$function = $1;
|
|
||||||
$exportlist =~ s/\s \Q$function\E \s/ /gx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# reporting exported functions without type signatures
|
|
||||||
$reported = 0;
|
|
||||||
$untyped = "";
|
|
||||||
while ($exportlist =~ /\s ($funOrOper) \s/x) {
|
|
||||||
$function = $1;
|
|
||||||
$exportlist =~ s/\s \Q$function\E \s/ /gx;
|
|
||||||
$reported++;
|
|
||||||
$untyped .= " $function";
|
|
||||||
}
|
|
||||||
push @ERR, "No type signature for $reported function(s):\n " . $untyped
|
|
||||||
if $reported;
|
|
||||||
|
|
||||||
print "-- $file\n > " . join("\n > ", @ERR) . "\n"
|
|
||||||
if @ERR;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
#!/bin/tcsh
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# Author: Peter Ljunglöf
|
|
||||||
# Time-stamp: "2005-05-12, 23:17"
|
|
||||||
# CVS $Date: 2005/05/13 12:40:20 $
|
|
||||||
# CVS $Author: peb $
|
|
||||||
#
|
|
||||||
# a script for producing documentation through Haddock
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
set basedir = `pwd`
|
|
||||||
set docdir = haddock/html
|
|
||||||
set tempdir = haddock/.temp-files
|
|
||||||
set resourcedir = haddock/resources
|
|
||||||
|
|
||||||
set files = (`find GF -name '*.hs'` GF.hs)
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
echo 1. Creating and cleaning Haddock directory
|
|
||||||
echo -- $docdir
|
|
||||||
|
|
||||||
mkdir -p $docdir
|
|
||||||
rm -rf $docdir/*
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo 2. Copying Haskell files to temporary directory: $tempdir
|
|
||||||
|
|
||||||
rm -rf $tempdir
|
|
||||||
|
|
||||||
foreach f ($files)
|
|
||||||
# echo -- $f
|
|
||||||
mkdir -p `dirname $tempdir/$f`
|
|
||||||
perl -pe 's/^#/-- CPP #/' $f > $tempdir/$f
|
|
||||||
end
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo 3. Invoking Haddock
|
|
||||||
|
|
||||||
cd $tempdir
|
|
||||||
haddock -o $basedir/$docdir -h -t 'Grammatical Framework' $files
|
|
||||||
cd $basedir
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo 4. Restructuring to HTML framesets
|
|
||||||
|
|
||||||
echo -- Substituting for frame targets inside html files
|
|
||||||
mv $docdir/index.html $docdir/index-frame.html
|
|
||||||
foreach f ($docdir/*.html)
|
|
||||||
# echo -- $f
|
|
||||||
perl -pe 's/<HEAD/<HEAD><BASE TARGET="contents"/; s/"index.html"/"index-frame.html"/; s/(<A HREF = "\S*index\S*.html")/$1 TARGET="index"/' $f > .tempfile
|
|
||||||
mv .tempfile $f
|
|
||||||
end
|
|
||||||
|
|
||||||
echo -- Copying resource files:
|
|
||||||
echo -- `ls $resourcedir/*.*`
|
|
||||||
cp $resourcedir/*.* $docdir
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo 5. Finished
|
|
||||||
echo -- The documentation is located at:
|
|
||||||
echo -- $docdir/index.html
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
|
||||||
|
|
||||||
<!-- Time-stamp: "2005-02-03, 15:59" -->
|
|
||||||
<HTML>
|
|
||||||
<HEAD>
|
|
||||||
<LINK HREF="haddock.css" REL=stylesheet>
|
|
||||||
</HEAD>
|
|
||||||
<BODY>
|
|
||||||
</BODY>
|
|
||||||
</HTML>
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"
|
|
||||||
"http://www.w3.org/TR/html4/frameset.dtd">
|
|
||||||
|
|
||||||
<!-- Time-stamp: "2005-02-03, 15:53" -->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
|
|
||||||
<title>Grammatical Framework programmer's documentation</title>
|
|
||||||
</head>
|
|
||||||
<frameset cols="1*,2*">
|
|
||||||
<frame name="index" src="index-frame.html">
|
|
||||||
<frame name="contents" src="blank.html">
|
|
||||||
</frameset>
|
|
||||||
</html>
|
|
||||||
@@ -1,334 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
|
|
||||||
<html><head><title>GF Version 2.9</title></head>
|
|
||||||
|
|
||||||
|
|
||||||
<body bgcolor="#ffffff" text="#000000">
|
|
||||||
|
|
||||||
<center>
|
|
||||||
<img src="gf-logo.gif">
|
|
||||||
|
|
||||||
<h1>Grammatical Framework</h1>
|
|
||||||
|
|
||||||
<h2>Version 2.9</h2>
|
|
||||||
|
|
||||||
December 21, 2007.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
</center>
|
|
||||||
|
|
||||||
<hr>
|
|
||||||
|
|
||||||
<a href=
|
|
||||||
"doc">Documentation</a>
|
|
||||||
|
|
|
||||||
<a href=
|
|
||||||
"download/">Download</a>
|
|
||||||
|
|
|
||||||
<a href=
|
|
||||||
"doc/darcs.html">LatestCode</a>
|
|
||||||
|
|
|
||||||
<a href=
|
|
||||||
"doc/gf-quickstart.html">QuickStart</a>
|
|
||||||
|
|
|
||||||
<a href=
|
|
||||||
"doc/gf-tutorial.html">Tutorial</a>
|
|
||||||
|
|
|
||||||
<a href=
|
|
||||||
"doc/gf-refman.html">ReferenceManual</a>
|
|
||||||
|
|
|
||||||
<a href="lib/resource/doc/synopsis.html">Libraries</a>
|
|
||||||
|
|
|
||||||
<a href=
|
|
||||||
"http://www.cs.chalmers.se/~bringert/gf/translate/">NumeralDemo</a>
|
|
||||||
|
|
|
||||||
<a href=
|
|
||||||
"http://www.cs.chalmers.se/~markus/gramlets/letter-applet.html">LetterDemo</a>
|
|
||||||
|
|
||||||
|
|
||||||
<hr>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
</p><h2>News</h2>
|
|
||||||
|
|
||||||
<i>June 25, 2008</i>.
|
|
||||||
<a href="doc/gf3-release.html">GF 3.0</a>
|
|
||||||
coming soon! Version 2.9f is now frozen and no longer
|
|
||||||
available in darcs. But <a href="download/GF-2.9f.tgz">here</a> is a tarball
|
|
||||||
with the final version of 2.9 sources.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
<i>March 20, 2008</i>. Ten years of GF!
|
|
||||||
<ul>
|
|
||||||
<li> <a href="doc/nancy-slides.pdf">The first public talk</a> at INRIA Nancy,
|
|
||||||
20 March 1998.
|
|
||||||
<li> <a href="doc/GF-0.1.tgz">GF Version 0.1</a> source code from XRCE Grenoble
|
|
||||||
18 March 1998 (Requires the
|
|
||||||
<a href="http://www.cs.chalmers.se/~augustss/hbc/hbc.html">HBC Haskell Compiler</a>
|
|
||||||
in "no-pedantic" mode).
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
<i>December 21, 2007</i>.
|
|
||||||
<ul>
|
|
||||||
<li> GF 2.9 is mainly a bug fix version;
|
|
||||||
<li> preview version of GF3: get the
|
|
||||||
<a href=
|
|
||||||
"http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">
|
|
||||||
latest sources</a> and compile with <tt>make gfc</tt>
|
|
||||||
<li> new version of the <a href="doc/gf-tutorial.html">tutorial</a>
|
|
||||||
<li> new <a href="doc/gf-refman.html">reference manual</a>
|
|
||||||
<li> <a href="demos/resource-api/editor.html">resource api browser</a>
|
|
||||||
using interactive editing
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
<a href="doc/old-news.html">News before 2.9</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</p><h2>What is GF?</h2>
|
|
||||||
|
|
||||||
The Grammatical Framework (=GF) is a grammar formalism based on type
|
|
||||||
theory. It consists of
|
|
||||||
<ul>
|
|
||||||
<li> a special-purpose programming language
|
|
||||||
</li><li> a compiler of the language
|
|
||||||
</li><li> a generic grammar processor
|
|
||||||
</li></ul>
|
|
||||||
The compiler reads
|
|
||||||
GF grammars from user-provided files,
|
|
||||||
and the generic grammar processor performs
|
|
||||||
various tasks with the grammars:
|
|
||||||
<ul>
|
|
||||||
<li> generation
|
|
||||||
</li><li> parsing
|
|
||||||
</li><li> translation
|
|
||||||
</li><li> type checking
|
|
||||||
</li><li> computation
|
|
||||||
</li><li> paraphrasing
|
|
||||||
</li><li> random and exhaustive generation
|
|
||||||
</li><li> syntax editing
|
|
||||||
</li></ul>
|
|
||||||
GF particularly addresses four aspects of grammars:
|
|
||||||
<ul>
|
|
||||||
<li> multilinguality (parallel grammars for different languages)
|
|
||||||
</li><li> semantics (semantic conditions of well-formedness, semantic
|
|
||||||
properties of expressions)
|
|
||||||
<li> modularity and grammar engineering
|
|
||||||
<li> reuse of grammars in different formats and as software components
|
|
||||||
</ul>
|
|
||||||
GF provides an easy way to experiment with grammars written in
|
|
||||||
different formats, including the ubiquitous BNF and EBNF formats.
|
|
||||||
The <a href="doc/gf-compiler.png">GF compilation chart</a> gives a
|
|
||||||
summary of the supported input and output formats (the nodes in ellipses).
|
|
||||||
|
|
||||||
<br>
|
|
||||||
|
|
||||||
For instance, if you want to create a finite-state automaton
|
|
||||||
in the HTK SLF format (to use for speech recognition), all you have to do
|
|
||||||
is to write an EBNF grammar in a file <tt>foo.ebnf</tt> and type
|
|
||||||
<pre>
|
|
||||||
echo "pg -printer=slf" | gf foo.ebnf
|
|
||||||
</pre>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<h2>License</h2>
|
|
||||||
|
|
||||||
GF is open-source software licensed under
|
|
||||||
<a href="LICENSE">GNU General Public License (GPL)</a>.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
The <a href="lib">GF Grammar Libraries</a> are licensed under
|
|
||||||
<a href="lib/resource/LICENSE">GNU Lesser General Public License (LGPL)</a>.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<h2>Examples and demos</h2>
|
|
||||||
|
|
||||||
<a href="http://www.cs.chalmers.se/~bringert/gf/translate/">Numeral
|
|
||||||
translator</a>: recognizes and generates
|
|
||||||
numbers from 1 to 999,999 in 80 languages.
|
|
||||||
(The link goes to a live applet, which requires
|
|
||||||
<a href="http://java.sun.com/j2se/1.5.0/download.jsp">Java 1.5 plugin</a>.
|
|
||||||
Here is an <a href="doc/2341.html">example</a>, which does
|
|
||||||
not require the plugin.)
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
<a href="http://www.cs.chalmers.se/%7Ekrijo/gramlets/letter-applet.html">Letter
|
|
||||||
editor</a>:
|
|
||||||
write simple letters in English, Finnish,
|
|
||||||
French, Swedish, and Russian with a few mouse clicks.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
<a
|
|
||||||
href="http://www.cs.chalmers.se/~bringert/misc/tramdemo.avi">Demo film</a>
|
|
||||||
of a multimodal dialogue system built with embedded grammars.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
<a href="examples/tutorial/">Example grammars</a> used in the
|
|
||||||
<a href="doc/gf-tutorial.html">GF tutorial</a>.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
<a href="lib/resource/doc/index.html">Resource grammar library</a>:
|
|
||||||
basic structures of ten languages
|
|
||||||
(Danish, English, Finnish, French, German,
|
|
||||||
Italian, Norwegian, Russian, Spanish, Swedish).
|
|
||||||
Resource grammars can be used as libraries for writing GF
|
|
||||||
applications,
|
|
||||||
but they can also be useful for language training.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<h2>Executable programs</h2>
|
|
||||||
|
|
||||||
GF is available for
|
|
||||||
several platforms: Linux, Mac OS X, Microsoft Windows, and Sun OS.
|
|
||||||
To get GF, go to the
|
|
||||||
<a href="download">Download Page</a>.
|
|
||||||
|
|
||||||
|
|
||||||
<h2>Quick start</h2>
|
|
||||||
|
|
||||||
When you have downloaded and installed GF, you can try one of the
|
|
||||||
<a href="doc/gf-quickstart.html">quick start examples</a>.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<h2>Source code</h2>
|
|
||||||
|
|
||||||
The main part of GF is written in
|
|
||||||
<a href="http://www.haskell.org/">Haskell</a>.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
The platform-independent graphical user interface is written in
|
|
||||||
<a href="http://java.sun.com/">Java</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</p><p>
|
|
||||||
|
|
||||||
The <a href="download/">Download Page</a>
|
|
||||||
gives links to source and binary packages, as well as
|
|
||||||
information on compiler requirements.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
The publicly accessible
|
|
||||||
<a href="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">
|
|
||||||
Darcs repository</a>
|
|
||||||
has the latest sources and documents.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
For Java programmers: GF grammars can be embedded in Java programs by using the
|
|
||||||
<a href="http://www.cs.chalmers.se/~bringert/gf/gf-java.html">
|
|
||||||
Embedded GF Interpreter</a>.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</p><h2>Documents</h2>
|
|
||||||
|
|
||||||
|
|
||||||
See the <a href="doc/index.html">Documentation page</a>.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<h2>Projects and events</h2>
|
|
||||||
|
|
||||||
<li> <a href="http://webalt.math.helsinki.fi/content/index_eng.html">WebALT</a>,
|
|
||||||
Web Advanced Learning Technologies. GF is used as for generating multilingual
|
|
||||||
teaching material in mathematics.
|
|
||||||
|
|
||||||
<li> <a href="http://www.talk-project.org">TALK</a> = Tools for Ambient Linguistic
|
|
||||||
Knowledge</a>. GF was used in implementing multimodal and multilingual dialogue systems.
|
|
||||||
|
|
||||||
<li> <a href="http://www.key-project.org/">KeY</a> project on Integrated Deductive
|
|
||||||
Software Design. GF was used for
|
|
||||||
authoring informal and formal specifications. More details on the GF
|
|
||||||
application
|
|
||||||
<a href="http://www.cs.chalmers.se/%7Ekrijo/gfspec">
|
|
||||||
here</a>.
|
|
||||||
|
|
||||||
<li>
|
|
||||||
Project <a href="http://efficient.citi.tudor.lu/index_noframe.html">Efficient</a>
|
|
||||||
at Tudor Institute, Luxembourg, "atelier de prototypage de transactions d'e-commerce".
|
|
||||||
GF is used as an authoring tool for business models.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<h2>Miscellaneous</h2>
|
|
||||||
|
|
||||||
|
|
||||||
</li><li>
|
|
||||||
<a href="doc/gfcc.pdf">
|
|
||||||
GFCC</a>:
|
|
||||||
report on a compiler from a fragment of C to JVM, written in GF.
|
|
||||||
The compiler source code can be found in the directory
|
|
||||||
<tt>examples/gfcc</tt> in the GF grammar library
|
|
||||||
(see <a href="http://sourceforge.net/project/showfiles.php?group_id=132285">GF download page</a>).
|
|
||||||
|
|
||||||
</li><li>
|
|
||||||
The original <a href="http://www.xrce.xerox.com/">
|
|
||||||
GF Xerox Home Page</a>
|
|
||||||
with the oldest releases of and documents on GF, up to Version 0.54, 1999,
|
|
||||||
does not seem to exist any more.
|
|
||||||
|
|
||||||
|
|
||||||
</li><li>
|
|
||||||
Earlier application:
|
|
||||||
<a href="http://www.cs.chalmers.se/%7Ehallgren/Alfa/Tutorial/GFplugin.html">
|
|
||||||
Natural-Language Interface to the proof editor Alfa</a>.
|
|
||||||
|
|
||||||
</li><li>
|
|
||||||
<a href="http://www.cs.chalmers.se/%7Emarkus/BNFC">The BNF Converter</a>.
|
|
||||||
A GF spin-off customized for the description of programming
|
|
||||||
languages.
|
|
||||||
|
|
||||||
</li><li>
|
|
||||||
|
|
||||||
<a href="http://www.cs.chalmers.se/%7Emarkus/FM">The Functional
|
|
||||||
Morphology project</a>. Creating infrastructure for GF and other
|
|
||||||
linguistic applications.
|
|
||||||
|
|
||||||
|
|
||||||
</li></ul>
|
|
||||||
|
|
||||||
<h2>Authors</h2>
|
|
||||||
|
|
||||||
The <a href="http://www.cs.chalmers.se/Cs/Research/Language-technology/">
|
|
||||||
Languge Technology Group</a>.
|
|
||||||
More details on the
|
|
||||||
<a href="http://www.cs.chalmers.se/%7Eaarne/GF/doc/gf-people.html">
|
|
||||||
Authors and Acknowledgements</a> page.
|
|
||||||
|
|
||||||
|
|
||||||
<h2>Implementation project</h2>
|
|
||||||
|
|
||||||
Want to become a GF developer? Contact
|
|
||||||
<a href="http://www.cs.chalmers.se/%7Eaarne/">Aarne Ranta</a>.
|
|
||||||
Or just get the sources and start hacking.
|
|
||||||
|
|
||||||
<p>
|
|
||||||
|
|
||||||
And register to the
|
|
||||||
<a href="https://lists.sourceforge.net/lists/listinfo/gf-tools-users">GF User's Mailing List</a>!
|
|
||||||
|
|
||||||
<hr>
|
|
||||||
|
|
||||||
Last modified by
|
|
||||||
<a href="http://www.cs.chalmers.se/%7Eaarne">
|
|
||||||
Aarne Ranta</a>,
|
|
||||||
December 21, 2007.
|
|
||||||
|
|
||||||
</body></html>
|
|
||||||
@@ -1,251 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# install - install a program, script, or datafile
|
|
||||||
# This comes from X11R5 (mit/util/scripts/install.sh).
|
|
||||||
#
|
|
||||||
# Copyright 1991 by the Massachusetts Institute of Technology
|
|
||||||
#
|
|
||||||
# Permission to use, copy, modify, distribute, and sell this software and its
|
|
||||||
# documentation for any purpose is hereby granted without fee, provided that
|
|
||||||
# the above copyright notice appear in all copies and that both that
|
|
||||||
# copyright notice and this permission notice appear in supporting
|
|
||||||
# documentation, and that the name of M.I.T. not be used in advertising or
|
|
||||||
# publicity pertaining to distribution of the software without specific,
|
|
||||||
# written prior permission. M.I.T. makes no representations about the
|
|
||||||
# suitability of this software for any purpose. It is provided "as is"
|
|
||||||
# without express or implied warranty.
|
|
||||||
#
|
|
||||||
# Calling this script install-sh is preferred over install.sh, to prevent
|
|
||||||
# `make' implicit rules from creating a file called install from it
|
|
||||||
# when there is no Makefile.
|
|
||||||
#
|
|
||||||
# This script is compatible with the BSD install script, but was written
|
|
||||||
# from scratch. It can only install one file at a time, a restriction
|
|
||||||
# shared with many OS's install programs.
|
|
||||||
|
|
||||||
|
|
||||||
# set DOITPROG to echo to test this script
|
|
||||||
|
|
||||||
# Don't use :- since 4.3BSD and earlier shells don't like it.
|
|
||||||
doit="${DOITPROG-}"
|
|
||||||
|
|
||||||
|
|
||||||
# put in absolute paths if you don't have them in your path; or use env. vars.
|
|
||||||
|
|
||||||
mvprog="${MVPROG-mv}"
|
|
||||||
cpprog="${CPPROG-cp}"
|
|
||||||
chmodprog="${CHMODPROG-chmod}"
|
|
||||||
chownprog="${CHOWNPROG-chown}"
|
|
||||||
chgrpprog="${CHGRPPROG-chgrp}"
|
|
||||||
stripprog="${STRIPPROG-strip}"
|
|
||||||
rmprog="${RMPROG-rm}"
|
|
||||||
mkdirprog="${MKDIRPROG-mkdir}"
|
|
||||||
|
|
||||||
transformbasename=""
|
|
||||||
transform_arg=""
|
|
||||||
instcmd="$mvprog"
|
|
||||||
chmodcmd="$chmodprog 0755"
|
|
||||||
chowncmd=""
|
|
||||||
chgrpcmd=""
|
|
||||||
stripcmd=""
|
|
||||||
rmcmd="$rmprog -f"
|
|
||||||
mvcmd="$mvprog"
|
|
||||||
src=""
|
|
||||||
dst=""
|
|
||||||
dir_arg=""
|
|
||||||
|
|
||||||
while [ x"$1" != x ]; do
|
|
||||||
case $1 in
|
|
||||||
-c) instcmd="$cpprog"
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
|
|
||||||
-d) dir_arg=true
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
|
|
||||||
-m) chmodcmd="$chmodprog $2"
|
|
||||||
shift
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
|
|
||||||
-o) chowncmd="$chownprog $2"
|
|
||||||
shift
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
|
|
||||||
-g) chgrpcmd="$chgrpprog $2"
|
|
||||||
shift
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
|
|
||||||
-s) stripcmd="$stripprog"
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
|
|
||||||
-t=*) transformarg=`echo $1 | sed 's/-t=//'`
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
|
|
||||||
-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
|
|
||||||
*) if [ x"$src" = x ]
|
|
||||||
then
|
|
||||||
src=$1
|
|
||||||
else
|
|
||||||
# this colon is to work around a 386BSD /bin/sh bug
|
|
||||||
:
|
|
||||||
dst=$1
|
|
||||||
fi
|
|
||||||
shift
|
|
||||||
continue;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ x"$src" = x ]
|
|
||||||
then
|
|
||||||
echo "install: no input file specified"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ x"$dir_arg" != x ]; then
|
|
||||||
dst=$src
|
|
||||||
src=""
|
|
||||||
|
|
||||||
if [ -d $dst ]; then
|
|
||||||
instcmd=:
|
|
||||||
chmodcmd=""
|
|
||||||
else
|
|
||||||
instcmd=mkdir
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
|
|
||||||
# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
|
|
||||||
# might cause directories to be created, which would be especially bad
|
|
||||||
# if $src (and thus $dsttmp) contains '*'.
|
|
||||||
|
|
||||||
if [ -f $src -o -d $src ]
|
|
||||||
then
|
|
||||||
true
|
|
||||||
else
|
|
||||||
echo "install: $src does not exist"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ x"$dst" = x ]
|
|
||||||
then
|
|
||||||
echo "install: no destination specified"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
true
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If destination is a directory, append the input filename; if your system
|
|
||||||
# does not like double slashes in filenames, you may need to add some logic
|
|
||||||
|
|
||||||
if [ -d $dst ]
|
|
||||||
then
|
|
||||||
dst="$dst"/`basename $src`
|
|
||||||
else
|
|
||||||
true
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
## this sed command emulates the dirname command
|
|
||||||
dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
|
|
||||||
|
|
||||||
# Make sure that the destination directory exists.
|
|
||||||
# this part is taken from Noah Friedman's mkinstalldirs script
|
|
||||||
|
|
||||||
# Skip lots of stat calls in the usual case.
|
|
||||||
if [ ! -d "$dstdir" ]; then
|
|
||||||
defaultIFS='
|
|
||||||
'
|
|
||||||
IFS="${IFS-${defaultIFS}}"
|
|
||||||
|
|
||||||
oIFS="${IFS}"
|
|
||||||
# Some sh's can't handle IFS=/ for some reason.
|
|
||||||
IFS='%'
|
|
||||||
set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
|
|
||||||
IFS="${oIFS}"
|
|
||||||
|
|
||||||
pathcomp=''
|
|
||||||
|
|
||||||
while [ $# -ne 0 ] ; do
|
|
||||||
pathcomp="${pathcomp}${1}"
|
|
||||||
shift
|
|
||||||
|
|
||||||
if [ ! -d "${pathcomp}" ] ;
|
|
||||||
then
|
|
||||||
$mkdirprog "${pathcomp}"
|
|
||||||
else
|
|
||||||
true
|
|
||||||
fi
|
|
||||||
|
|
||||||
pathcomp="${pathcomp}/"
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ x"$dir_arg" != x ]
|
|
||||||
then
|
|
||||||
$doit $instcmd $dst &&
|
|
||||||
|
|
||||||
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
|
|
||||||
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
|
|
||||||
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
|
|
||||||
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
|
|
||||||
else
|
|
||||||
|
|
||||||
# If we're going to rename the final executable, determine the name now.
|
|
||||||
|
|
||||||
if [ x"$transformarg" = x ]
|
|
||||||
then
|
|
||||||
dstfile=`basename $dst`
|
|
||||||
else
|
|
||||||
dstfile=`basename $dst $transformbasename |
|
|
||||||
sed $transformarg`$transformbasename
|
|
||||||
fi
|
|
||||||
|
|
||||||
# don't allow the sed command to completely eliminate the filename
|
|
||||||
|
|
||||||
if [ x"$dstfile" = x ]
|
|
||||||
then
|
|
||||||
dstfile=`basename $dst`
|
|
||||||
else
|
|
||||||
true
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Make a temp file name in the proper directory.
|
|
||||||
|
|
||||||
dsttmp=$dstdir/#inst.$$#
|
|
||||||
|
|
||||||
# Move or copy the file name to the temp name
|
|
||||||
|
|
||||||
$doit $instcmd $src $dsttmp &&
|
|
||||||
|
|
||||||
trap "rm -f ${dsttmp}" 0 &&
|
|
||||||
|
|
||||||
# and set any options; do chmod last to preserve setuid bits
|
|
||||||
|
|
||||||
# If any of these fail, we abort the whole thing. If we want to
|
|
||||||
# ignore errors from any of these, just make sure not to ignore
|
|
||||||
# errors from the above "$doit $instcmd $src $dsttmp" command.
|
|
||||||
|
|
||||||
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
|
|
||||||
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
|
|
||||||
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
|
|
||||||
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
|
|
||||||
|
|
||||||
# Now rename the file to the real destination.
|
|
||||||
|
|
||||||
$doit $rmcmd -f $dstdir/$dstfile &&
|
|
||||||
$doit $mvcmd $dsttmp $dstdir/$dstfile
|
|
||||||
|
|
||||||
fi &&
|
|
||||||
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#! /bin/sh
|
|
||||||
|
|
||||||
JGUILIB=$GFHOME/src/JavaGUI
|
|
||||||
GF=$GFHOME/bin/gf
|
|
||||||
JGUI=GFEditor2
|
|
||||||
|
|
||||||
java -cp $JGUILIB $JGUI "$GF -java $*"
|
|
||||||
|
|
||||||
@@ -1,165 +0,0 @@
|
|||||||
GNU LESSER GENERAL PUBLIC LICENSE
|
|
||||||
Version 3, 29 June 2007
|
|
||||||
|
|
||||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
|
||||||
of this license document, but changing it is not allowed.
|
|
||||||
|
|
||||||
|
|
||||||
This version of the GNU Lesser General Public License incorporates
|
|
||||||
the terms and conditions of version 3 of the GNU General Public
|
|
||||||
License, supplemented by the additional permissions listed below.
|
|
||||||
|
|
||||||
0. Additional Definitions.
|
|
||||||
|
|
||||||
As used herein, "this License" refers to version 3 of the GNU Lesser
|
|
||||||
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
|
||||||
General Public License.
|
|
||||||
|
|
||||||
"The Library" refers to a covered work governed by this License,
|
|
||||||
other than an Application or a Combined Work as defined below.
|
|
||||||
|
|
||||||
An "Application" is any work that makes use of an interface provided
|
|
||||||
by the Library, but which is not otherwise based on the Library.
|
|
||||||
Defining a subclass of a class defined by the Library is deemed a mode
|
|
||||||
of using an interface provided by the Library.
|
|
||||||
|
|
||||||
A "Combined Work" is a work produced by combining or linking an
|
|
||||||
Application with the Library. The particular version of the Library
|
|
||||||
with which the Combined Work was made is also called the "Linked
|
|
||||||
Version".
|
|
||||||
|
|
||||||
The "Minimal Corresponding Source" for a Combined Work means the
|
|
||||||
Corresponding Source for the Combined Work, excluding any source code
|
|
||||||
for portions of the Combined Work that, considered in isolation, are
|
|
||||||
based on the Application, and not on the Linked Version.
|
|
||||||
|
|
||||||
The "Corresponding Application Code" for a Combined Work means the
|
|
||||||
object code and/or source code for the Application, including any data
|
|
||||||
and utility programs needed for reproducing the Combined Work from the
|
|
||||||
Application, but excluding the System Libraries of the Combined Work.
|
|
||||||
|
|
||||||
1. Exception to Section 3 of the GNU GPL.
|
|
||||||
|
|
||||||
You may convey a covered work under sections 3 and 4 of this License
|
|
||||||
without being bound by section 3 of the GNU GPL.
|
|
||||||
|
|
||||||
2. Conveying Modified Versions.
|
|
||||||
|
|
||||||
If you modify a copy of the Library, and, in your modifications, a
|
|
||||||
facility refers to a function or data to be supplied by an Application
|
|
||||||
that uses the facility (other than as an argument passed when the
|
|
||||||
facility is invoked), then you may convey a copy of the modified
|
|
||||||
version:
|
|
||||||
|
|
||||||
a) under this License, provided that you make a good faith effort to
|
|
||||||
ensure that, in the event an Application does not supply the
|
|
||||||
function or data, the facility still operates, and performs
|
|
||||||
whatever part of its purpose remains meaningful, or
|
|
||||||
|
|
||||||
b) under the GNU GPL, with none of the additional permissions of
|
|
||||||
this License applicable to that copy.
|
|
||||||
|
|
||||||
3. Object Code Incorporating Material from Library Header Files.
|
|
||||||
|
|
||||||
The object code form of an Application may incorporate material from
|
|
||||||
a header file that is part of the Library. You may convey such object
|
|
||||||
code under terms of your choice, provided that, if the incorporated
|
|
||||||
material is not limited to numerical parameters, data structure
|
|
||||||
layouts and accessors, or small macros, inline functions and templates
|
|
||||||
(ten or fewer lines in length), you do both of the following:
|
|
||||||
|
|
||||||
a) Give prominent notice with each copy of the object code that the
|
|
||||||
Library is used in it and that the Library and its use are
|
|
||||||
covered by this License.
|
|
||||||
|
|
||||||
b) Accompany the object code with a copy of the GNU GPL and this license
|
|
||||||
document.
|
|
||||||
|
|
||||||
4. Combined Works.
|
|
||||||
|
|
||||||
You may convey a Combined Work under terms of your choice that,
|
|
||||||
taken together, effectively do not restrict modification of the
|
|
||||||
portions of the Library contained in the Combined Work and reverse
|
|
||||||
engineering for debugging such modifications, if you also do each of
|
|
||||||
the following:
|
|
||||||
|
|
||||||
a) Give prominent notice with each copy of the Combined Work that
|
|
||||||
the Library is used in it and that the Library and its use are
|
|
||||||
covered by this License.
|
|
||||||
|
|
||||||
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
|
||||||
document.
|
|
||||||
|
|
||||||
c) For a Combined Work that displays copyright notices during
|
|
||||||
execution, include the copyright notice for the Library among
|
|
||||||
these notices, as well as a reference directing the user to the
|
|
||||||
copies of the GNU GPL and this license document.
|
|
||||||
|
|
||||||
d) Do one of the following:
|
|
||||||
|
|
||||||
0) Convey the Minimal Corresponding Source under the terms of this
|
|
||||||
License, and the Corresponding Application Code in a form
|
|
||||||
suitable for, and under terms that permit, the user to
|
|
||||||
recombine or relink the Application with a modified version of
|
|
||||||
the Linked Version to produce a modified Combined Work, in the
|
|
||||||
manner specified by section 6 of the GNU GPL for conveying
|
|
||||||
Corresponding Source.
|
|
||||||
|
|
||||||
1) Use a suitable shared library mechanism for linking with the
|
|
||||||
Library. A suitable mechanism is one that (a) uses at run time
|
|
||||||
a copy of the Library already present on the user's computer
|
|
||||||
system, and (b) will operate properly with a modified version
|
|
||||||
of the Library that is interface-compatible with the Linked
|
|
||||||
Version.
|
|
||||||
|
|
||||||
e) Provide Installation Information, but only if you would otherwise
|
|
||||||
be required to provide such information under section 6 of the
|
|
||||||
GNU GPL, and only to the extent that such information is
|
|
||||||
necessary to install and execute a modified version of the
|
|
||||||
Combined Work produced by recombining or relinking the
|
|
||||||
Application with a modified version of the Linked Version. (If
|
|
||||||
you use option 4d0, the Installation Information must accompany
|
|
||||||
the Minimal Corresponding Source and Corresponding Application
|
|
||||||
Code. If you use option 4d1, you must provide the Installation
|
|
||||||
Information in the manner specified by section 6 of the GNU GPL
|
|
||||||
for conveying Corresponding Source.)
|
|
||||||
|
|
||||||
5. Combined Libraries.
|
|
||||||
|
|
||||||
You may place library facilities that are a work based on the
|
|
||||||
Library side by side in a single library together with other library
|
|
||||||
facilities that are not Applications and are not covered by this
|
|
||||||
License, and convey such a combined library under terms of your
|
|
||||||
choice, if you do both of the following:
|
|
||||||
|
|
||||||
a) Accompany the combined library with a copy of the same work based
|
|
||||||
on the Library, uncombined with any other library facilities,
|
|
||||||
conveyed under the terms of this License.
|
|
||||||
|
|
||||||
b) Give prominent notice with the combined library that part of it
|
|
||||||
is a work based on the Library, and explaining where to find the
|
|
||||||
accompanying uncombined form of the same work.
|
|
||||||
|
|
||||||
6. Revised Versions of the GNU Lesser General Public License.
|
|
||||||
|
|
||||||
The Free Software Foundation may publish revised and/or new versions
|
|
||||||
of the GNU Lesser General Public License from time to time. Such new
|
|
||||||
versions will be similar in spirit to the present version, but may
|
|
||||||
differ in detail to address new problems or concerns.
|
|
||||||
|
|
||||||
Each version is given a distinguishing version number. If the
|
|
||||||
Library as you received it specifies that a certain numbered version
|
|
||||||
of the GNU Lesser General Public License "or any later version"
|
|
||||||
applies to it, you have the option of following the terms and
|
|
||||||
conditions either of that published version or of any later version
|
|
||||||
published by the Free Software Foundation. If the Library as you
|
|
||||||
received it does not specify a version number of the GNU Lesser
|
|
||||||
General Public License, you may choose any version of the GNU Lesser
|
|
||||||
General Public License ever published by the Free Software Foundation.
|
|
||||||
|
|
||||||
If the Library as you received it specifies that a proxy can decide
|
|
||||||
whether future versions of the GNU Lesser General Public License shall
|
|
||||||
apply, that proxy's public statement of acceptance of any version is
|
|
||||||
permanent authorization for you to choose that version for the
|
|
||||||
Library.
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
||||||
<HTML>
|
|
||||||
<HEAD>
|
|
||||||
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
|
|
||||||
<TITLE>GF Grammar Libraries</TITLE>
|
|
||||||
</HEAD><BODY BGCOLOR="white" TEXT="black">
|
|
||||||
<P ALIGN="center"><CENTER><H1>GF Grammar Libraries</H1>
|
|
||||||
<FONT SIZE="4">
|
|
||||||
<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR>
|
|
||||||
Last update: Fri Dec 22 15:19:46 2006
|
|
||||||
</FONT></CENTER>
|
|
||||||
|
|
||||||
<P>
|
|
||||||
One of the main ideas of
|
|
||||||
<A HREF="..">GF</A>
|
|
||||||
is the use of libraries in grammar writing, in a way familiar
|
|
||||||
from software engineering. In this way, large grammars can
|
|
||||||
be built in cooperation, and old grammars or parts of them
|
|
||||||
can be reused in new grammars. The slides
|
|
||||||
<A HREF="resource-1.0/doc/gslt-sem-2006.html">Grammars as Software Libraries</A>
|
|
||||||
give some introduction to this idea.
|
|
||||||
</P>
|
|
||||||
<H2>The resource grammar library</H2>
|
|
||||||
<P>
|
|
||||||
This library covers basic linguistic structures of
|
|
||||||
different languages.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="resource-1.0/doc">Version 1.1</A> released 22 December 2006
|
|
||||||
(enhanced version of 1.0).
|
|
||||||
Covers Danish, English, Finnish, French, German, Italian, Norwegian,
|
|
||||||
Russian, Spanish, and Swedish.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Two older versions are also available:
|
|
||||||
<A HREF="resource/">Version 0.9</A>
|
|
||||||
and
|
|
||||||
<A HREF="resource-0.6/">Version 0.6</A>.
|
|
||||||
</P>
|
|
||||||
<H2>The prelude library</H2>
|
|
||||||
<P>
|
|
||||||
The <A HREF="prelude/">prelude</A>
|
|
||||||
library gives utility functions for different GF applications:
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="resource-1.0/doc/gfdoc/Precedence.html">Precedence</A>. Utilities for
|
|
||||||
formal languages: precedence levels, associatives, infixes.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="resource-1.0/doc/gfdoc/Predef.html">Predef</A>. Type signatures
|
|
||||||
of predefined (hard-coded) functions.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<A HREF="resource-1.0/doc/gfdoc/Prelude.html">Prelude</A>. Generic utilities
|
|
||||||
for strings, tables, records, booleans.
|
|
||||||
</P>
|
|
||||||
|
|
||||||
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
|
|
||||||
<!-- cmdline: txt2tags index.txt -->
|
|
||||||
</BODY></HTML>
|
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
GF Grammar Libraries
|
|
||||||
Author: Aarne Ranta <aarne (at) cs.chalmers.se>
|
|
||||||
Last update: %%date(%c)
|
|
||||||
|
|
||||||
% NOTE: this is a txt2tags file.
|
|
||||||
% Create an html file from this file using:
|
|
||||||
% txt2tags --toc -thtml index.txt
|
|
||||||
|
|
||||||
%!target:html
|
|
||||||
|
|
||||||
One of the main ideas of
|
|
||||||
[GF ..]
|
|
||||||
is the use of libraries in grammar writing, in a way familiar
|
|
||||||
from software engineering. In this way, large grammars can
|
|
||||||
be built in cooperation, and old grammars or parts of them
|
|
||||||
can be reused in new grammars. The slides
|
|
||||||
[Grammars as Software Libraries resource-1.0/doc/gslt-sem-2006.html]
|
|
||||||
give some introduction to this idea.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==The resource grammar library==
|
|
||||||
|
|
||||||
This library covers basic linguistic structures of
|
|
||||||
different languages.
|
|
||||||
|
|
||||||
[Version 1.2 resource-1.0/doc] released 22 December 2006
|
|
||||||
(enhanced version of 1.0).
|
|
||||||
Covers Danish, English, Finnish, French, German, Italian, Norwegian,
|
|
||||||
Russian, Spanish, and Swedish, and to a smaller extent Arabic and Catalan.
|
|
||||||
|
|
||||||
Two older versions are also available:
|
|
||||||
[Version 0.9 resource/]
|
|
||||||
and
|
|
||||||
[Version 0.6 resource-0.6/].
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==The prelude library==
|
|
||||||
|
|
||||||
The [prelude prelude/]
|
|
||||||
library gives utility functions for different GF applications:
|
|
||||||
|
|
||||||
[Precedence resource-1.0/doc/gfdoc/Precedence.html]. Utilities for
|
|
||||||
formal languages: precedence levels, associatives, infixes.
|
|
||||||
|
|
||||||
[Predef resource-1.0/doc/gfdoc/Predef.html]. Type signatures
|
|
||||||
of predefined (hard-coded) functions.
|
|
||||||
|
|
||||||
[Prelude resource-1.0/doc/gfdoc/Prelude.html]. Generic utilities
|
|
||||||
for strings, tables, records, booleans.
|
|
||||||
|
|
||||||
|
|
||||||
==License==
|
|
||||||
|
|
||||||
All libraries in this directory and its subdirectories are
|
|
||||||
releaced under GNU Lesser General Public License (LGPL). See the file
|
|
||||||
[LICENSE ./LICENSE] for more details.
|
|
||||||
@@ -1,129 +0,0 @@
|
|||||||
resource Coordination = open Prelude in {
|
|
||||||
|
|
||||||
param
|
|
||||||
ListSize = TwoElem | ManyElem ;
|
|
||||||
|
|
||||||
oper
|
|
||||||
ListX = {s1,s2 : Str} ;
|
|
||||||
|
|
||||||
twoStr : (x,y : Str) -> ListX = \x,y ->
|
|
||||||
{s1 = x ; s2 = y} ;
|
|
||||||
consStr : Str -> ListX -> Str -> ListX = \comma,xs,x ->
|
|
||||||
{s1 = xs.s1 ++ comma ++ xs.s2 ; s2 = x } ;
|
|
||||||
|
|
||||||
twoSS : (_,_ : SS) -> ListX = \x,y ->
|
|
||||||
twoStr x.s y.s ;
|
|
||||||
consSS : Str -> ListX -> SS -> ListX = \comma,xs,x ->
|
|
||||||
consStr comma xs x.s ;
|
|
||||||
|
|
||||||
Conjunction : Type = SS ;
|
|
||||||
ConjunctionDistr : Type = {s1 : Str ; s2 : Str} ;
|
|
||||||
|
|
||||||
conjunctX : Conjunction -> ListX -> Str = \or,xs ->
|
|
||||||
xs.s1 ++ or.s ++ xs.s2 ;
|
|
||||||
|
|
||||||
conjunctDistrX : ConjunctionDistr -> ListX -> Str = \or,xs ->
|
|
||||||
or.s1 ++ xs.s1 ++ or.s2 ++ xs.s2 ;
|
|
||||||
|
|
||||||
conjunctSS : Conjunction -> ListX -> SS = \or,xs ->
|
|
||||||
ss (xs.s1 ++ or.s ++ xs.s2) ;
|
|
||||||
|
|
||||||
conjunctDistrSS : ConjunctionDistr -> ListX -> SS = \or,xs ->
|
|
||||||
ss (or.s1 ++ xs.s1 ++ or.s2 ++ xs.s2) ;
|
|
||||||
|
|
||||||
-- all this lifted to tables
|
|
||||||
|
|
||||||
ListTable : Type -> Type = \P -> {s1,s2 : P => Str} ;
|
|
||||||
|
|
||||||
twoTable : (P : Type) -> (_,_ : {s : P => Str}) -> ListTable P = \_,x,y ->
|
|
||||||
{s1 = x.s ; s2 = y.s} ;
|
|
||||||
|
|
||||||
consTable : (P : Type) -> Str -> ListTable P -> {s : P => Str} -> ListTable P =
|
|
||||||
\P,c,xs,x ->
|
|
||||||
{s1 = table P {o => xs.s1 ! o ++ c ++ xs.s2 ! o} ; s2 = x.s} ;
|
|
||||||
|
|
||||||
conjunctTable : (P : Type) -> Conjunction -> ListTable P -> {s : P => Str} =
|
|
||||||
\P,or,xs ->
|
|
||||||
{s = table P {p => xs.s1 ! p ++ or.s ++ xs.s2 ! p}} ;
|
|
||||||
|
|
||||||
conjunctDistrTable :
|
|
||||||
(P : Type) -> ConjunctionDistr -> ListTable P -> {s : P => Str} = \P,or,xs ->
|
|
||||||
{s = table P {p => or.s1++ xs.s1 ! p ++ or.s2 ++ xs.s2 ! p}} ;
|
|
||||||
|
|
||||||
-- ... and to two- and three-argument tables: how clumsy! ---
|
|
||||||
|
|
||||||
ListTable2 : Type -> Type -> Type = \P,Q ->
|
|
||||||
{s1,s2 : P => Q => Str} ;
|
|
||||||
|
|
||||||
twoTable2 : (P,Q : Type) -> (_,_ : {s : P => Q => Str}) -> ListTable2 P Q =
|
|
||||||
\_,_,x,y ->
|
|
||||||
{s1 = x.s ; s2 = y.s} ;
|
|
||||||
|
|
||||||
consTable2 :
|
|
||||||
(P,Q : Type) -> Str -> ListTable2 P Q -> {s : P => Q => Str} -> ListTable2 P Q =
|
|
||||||
\P,Q,c,xs,x ->
|
|
||||||
{s1 = table P {p => table Q {q => xs.s1 ! p ! q ++ c ++ xs.s2 ! p! q}} ;
|
|
||||||
s2 = x.s
|
|
||||||
} ;
|
|
||||||
|
|
||||||
conjunctTable2 :
|
|
||||||
(P,Q : Type) -> Conjunction -> ListTable2 P Q -> {s : P => Q => Str} =
|
|
||||||
\P,Q,or,xs ->
|
|
||||||
{s = table P {p => table Q {q => xs.s1 ! p ! q ++ or.s ++ xs.s2 ! p ! q}}} ;
|
|
||||||
|
|
||||||
conjunctDistrTable2 :
|
|
||||||
(P,Q : Type) -> ConjunctionDistr -> ListTable2 P Q -> {s : P => Q => Str} =
|
|
||||||
\P,Q,or,xs ->
|
|
||||||
{s =
|
|
||||||
table P {p => table Q {q => or.s1++ xs.s1 ! p ! q ++ or.s2 ++ xs.s2 ! p ! q}}} ;
|
|
||||||
|
|
||||||
ListTable3 : Type -> Type -> Type -> Type = \P,Q,R ->
|
|
||||||
{s1,s2 : P => Q => R => Str} ;
|
|
||||||
|
|
||||||
twoTable3 : (P,Q,R : Type) -> (_,_ : {s : P => Q => R => Str}) ->
|
|
||||||
ListTable3 P Q R =
|
|
||||||
\_,_,_,x,y ->
|
|
||||||
{s1 = x.s ; s2 = y.s} ;
|
|
||||||
|
|
||||||
consTable3 :
|
|
||||||
(P,Q,R : Type) -> Str -> ListTable3 P Q R -> {s : P => Q => R => Str} ->
|
|
||||||
ListTable3 P Q R =
|
|
||||||
\P,Q,R,c,xs,x ->
|
|
||||||
{s1 = \\p,q,r => xs.s1 ! p ! q ! r ++ c ++ xs.s2 ! p ! q ! r ;
|
|
||||||
s2 = x.s
|
|
||||||
} ;
|
|
||||||
|
|
||||||
conjunctTable3 :
|
|
||||||
(P,Q,R : Type) -> Conjunction -> ListTable3 P Q R -> {s : P => Q => R => Str} =
|
|
||||||
\P,Q,R,or,xs ->
|
|
||||||
{s = \\p,q,r => xs.s1 ! p ! q ! r ++ or.s ++ xs.s2 ! p ! q ! r} ;
|
|
||||||
|
|
||||||
conjunctDistrTable3 :
|
|
||||||
(P,Q,R : Type) -> ConjunctionDistr -> ListTable3 P Q R ->
|
|
||||||
{s : P => Q => R => Str} =
|
|
||||||
\P,Q,R,or,xs ->
|
|
||||||
{s = \\p,q,r => or.s1++ xs.s1 ! p ! q ! r ++ or.s2 ++ xs.s2 ! p ! q ! r} ;
|
|
||||||
|
|
||||||
comma = "," ;
|
|
||||||
|
|
||||||
-- you can also do this to right-associative lists:
|
|
||||||
|
|
||||||
consrStr : Str -> Str -> ListX -> ListX = \comma,x,xs ->
|
|
||||||
{s1 = x ++ comma ++ xs.s1 ; s2 = xs.s2 } ;
|
|
||||||
|
|
||||||
consrSS : Str -> SS -> ListX -> ListX = \comma,x,xs ->
|
|
||||||
consrStr comma x.s xs ;
|
|
||||||
|
|
||||||
consrTable : (P : Type) -> Str -> {s : P => Str} -> ListTable P -> ListTable P =
|
|
||||||
\P,c,x,xs ->
|
|
||||||
{s1 = table P {o => x.s ! o ++ c ++ xs.s1 ! o} ; s2 = xs.s2} ;
|
|
||||||
|
|
||||||
consrTable2 : (P,Q : Type) -> Str -> {s : P => Q => Str} ->
|
|
||||||
ListTable2 P Q -> ListTable2 P Q =
|
|
||||||
\P,Q,c,x,xs ->
|
|
||||||
{s1 = table P {p => table Q {q => x.s ! p ! q ++ c ++ xs.s1 ! p ! q}} ;
|
|
||||||
s2 = xs.s2
|
|
||||||
} ;
|
|
||||||
|
|
||||||
|
|
||||||
} ;
|
|
||||||
@@ -1,54 +0,0 @@
|
|||||||
resource Formal = open Prelude in {
|
|
||||||
|
|
||||||
-- to replace the old library Precedence
|
|
||||||
|
|
||||||
oper
|
|
||||||
Prec : PType ;
|
|
||||||
TermPrec : Type = {s : Str ; p : Prec} ;
|
|
||||||
|
|
||||||
mkPrec : Prec -> Str -> TermPrec = \p,s ->
|
|
||||||
{s = s ; p = p} ;
|
|
||||||
|
|
||||||
top : TermPrec -> Str = usePrec 0 ;
|
|
||||||
|
|
||||||
constant : Str -> TermPrec = mkPrec highest ;
|
|
||||||
|
|
||||||
infixl : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y ->
|
|
||||||
mkPrec p (usePrec p x ++ f ++ usePrec (nextPrec p) y) ;
|
|
||||||
infixr : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y ->
|
|
||||||
mkPrec p (usePrec (nextPrec p) x ++ f ++ usePrec p y) ;
|
|
||||||
infixn : Prec -> Str -> (_,_ : TermPrec) -> TermPrec = \p,f,x,y ->
|
|
||||||
mkPrec p (usePrec (nextPrec p) x ++ f ++ usePrec (nextPrec p) y) ;
|
|
||||||
|
|
||||||
-- auxiliaries, should not be needed so much
|
|
||||||
|
|
||||||
usePrec : Prec -> TermPrec -> Str = \p,x ->
|
|
||||||
case lessPrec x.p p of {
|
|
||||||
True => parenth x.s ;
|
|
||||||
False => parenthOpt x.s
|
|
||||||
} ;
|
|
||||||
|
|
||||||
parenth : Str -> Str = \s -> "(" ++ s ++ ")" ;
|
|
||||||
parenthOpt : Str -> Str = \s -> variants {s ; "(" ++ s ++ ")"} ;
|
|
||||||
|
|
||||||
--.
|
|
||||||
-- low-level things: don't use
|
|
||||||
|
|
||||||
Prec : PType = Predef.Ints 4 ;
|
|
||||||
|
|
||||||
highest = 4 ;
|
|
||||||
|
|
||||||
lessPrec : Prec -> Prec -> Bool = \p,q ->
|
|
||||||
case <<p,q> : Prec * Prec> of {
|
|
||||||
<3,4> | <2,3> | <2,4> => True ;
|
|
||||||
<1,1> | <1,0> | <0,0> => False ;
|
|
||||||
<1,_> | <0,_> => True ;
|
|
||||||
_ => False
|
|
||||||
} ;
|
|
||||||
|
|
||||||
nextPrec : Prec -> Prec = \p -> case <p : Prec> of {
|
|
||||||
4 => 4 ;
|
|
||||||
n => Predef.plus n 1
|
|
||||||
} ;
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
resource HTML = open Prelude in {
|
|
||||||
oper
|
|
||||||
tag : Str -> Str = \t -> "<" + t + ">" ;
|
|
||||||
endtag : Str -> Str = \t -> tag ("/" + t) ;
|
|
||||||
intag : Str -> Str -> Str = \t,s -> tag t ++ s ++ endtag t ;
|
|
||||||
intagAttr : Str -> Str -> Str -> Str =
|
|
||||||
\t,a,s -> ("<" + t) ++ (a + ">") ++ s ++ endtag t ;
|
|
||||||
}
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
resource Latex = open Prelude in {
|
|
||||||
oper
|
|
||||||
command : Str -> Str = \c -> "\\" + c ;
|
|
||||||
fun1 : Str -> Str -> Str = \f,x -> "\\" + f + "{" ++ x ++ "}" ;
|
|
||||||
fun2 : Str -> Str -> Str -> Str =
|
|
||||||
\f,x,y -> "\\" + f + "{" ++ x ++ "}{" ++ y ++ "}" ;
|
|
||||||
begin : Str -> Str = \e -> "\\begin{" + e + "}" ;
|
|
||||||
end : Str -> Str = \e -> "\\end{" + e + "}" ;
|
|
||||||
inEnv : Str -> Str -> Str = \e,s -> begin e ++ s ++ end e ;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,117 +0,0 @@
|
|||||||
-- operations for precedence-dependent strings.
|
|
||||||
-- five levels:
|
|
||||||
-- p4 (constants), p3 (applications), p2 (products), p1 (sums), p0 (arrows)
|
|
||||||
|
|
||||||
resource Precedence = open Prelude in {
|
|
||||||
|
|
||||||
param
|
|
||||||
Prec = p4 | p3 | p2 | p1 | p0 ;
|
|
||||||
|
|
||||||
oper
|
|
||||||
PrecTerm = Prec => Str ;
|
|
||||||
|
|
||||||
oper
|
|
||||||
pss : PrecTerm -> {s : PrecTerm} = \s -> {s = s} ;
|
|
||||||
|
|
||||||
|
|
||||||
-- change this if you want some other type of parentheses
|
|
||||||
mkParenth : Str -> Str = \str -> "(" ++ str ++ ")" ;
|
|
||||||
|
|
||||||
-- define ordering of precedences
|
|
||||||
nextPrec : Prec => Prec =
|
|
||||||
table {p0 => p1 ; p1 => p2 ; p2 => p3 ; _ => p4} ;
|
|
||||||
prevPrec : Prec => Prec =
|
|
||||||
table {p4 => p3 ; p3 => p2 ; p2 => p1 ; _ => p0} ;
|
|
||||||
|
|
||||||
mkPrec : Str -> Prec => Prec => Str = \str ->
|
|
||||||
table {
|
|
||||||
p4 => table { -- use the term of precedence p4...
|
|
||||||
_ => str} ; -- ...always without parentheses
|
|
||||||
p3 => table { -- use the term of precedence p3...
|
|
||||||
p4 => mkParenth str ; -- ...in parentheses if p4 is required...
|
|
||||||
_ => str} ; -- ...otherwise without parentheses
|
|
||||||
p2 => table {
|
|
||||||
p4 => mkParenth str ;
|
|
||||||
p3 => mkParenth str ;
|
|
||||||
_ => str} ;
|
|
||||||
p1 => table {
|
|
||||||
p1 => str ;
|
|
||||||
p0 => str ;
|
|
||||||
_ => mkParenth str} ;
|
|
||||||
p0 => table {
|
|
||||||
p0 => str ;
|
|
||||||
_ => mkParenth str}
|
|
||||||
} ;
|
|
||||||
|
|
||||||
-- make a string into a constant, of precedence p4
|
|
||||||
mkConst : Str -> PrecTerm =
|
|
||||||
\f ->
|
|
||||||
mkPrec f ! p4 ;
|
|
||||||
|
|
||||||
-- make a string into a 1/2/3 -place prefix operator, of precedence p3
|
|
||||||
mkFun1 : Str -> PrecTerm -> PrecTerm =
|
|
||||||
\f -> \x ->
|
|
||||||
table {k => mkPrec (f ++ x ! p4) ! p3 ! k} ;
|
|
||||||
mkFun2 : Str -> PrecTerm -> PrecTerm -> PrecTerm =
|
|
||||||
\f -> \x -> \y ->
|
|
||||||
table {k => mkPrec (f ++ x ! p4 ++ y ! p4) ! p3 ! k} ;
|
|
||||||
mkFun3 : Str -> PrecTerm -> PrecTerm -> PrecTerm -> PrecTerm =
|
|
||||||
\f -> \x -> \y -> \z ->
|
|
||||||
table {k => mkPrec (f ++ x ! p4 ++ y ! p4 ++ z ! p4) ! p3 ! k} ;
|
|
||||||
|
|
||||||
-- make a string into a non/left/right -associative infix operator, of precedence p
|
|
||||||
mkInfix : Str -> Prec -> PrecTerm -> PrecTerm -> PrecTerm =
|
|
||||||
\f -> \p -> \x -> \y ->
|
|
||||||
table {k => mkPrec (x ! (nextPrec ! p) ++ f ++ y ! (nextPrec ! p)) ! p ! k} ;
|
|
||||||
mkInfixL : Str -> Prec -> PrecTerm -> PrecTerm -> PrecTerm =
|
|
||||||
\f -> \p -> \x -> \y ->
|
|
||||||
table {k => mkPrec (x ! p ++ f ++ y ! (nextPrec ! p)) ! p ! k} ;
|
|
||||||
mkInfixR : Str -> Prec -> PrecTerm -> PrecTerm -> PrecTerm =
|
|
||||||
\f -> \p -> \x -> \y ->
|
|
||||||
table {k => mkPrec (x ! (nextPrec ! p) ++ f ++ y ! p) ! p ! k} ;
|
|
||||||
|
|
||||||
-----------------------------------------------------------
|
|
||||||
|
|
||||||
-- alternative:
|
|
||||||
-- precedence as inherent feature
|
|
||||||
|
|
||||||
oper TermWithPrec = {s : Str ; p : Prec} ;
|
|
||||||
|
|
||||||
oper
|
|
||||||
mkpPrec : Str -> Prec -> TermWithPrec =
|
|
||||||
\f -> \p ->
|
|
||||||
{s = f ; p = p} ;
|
|
||||||
|
|
||||||
usePrec : TermWithPrec -> Prec -> Str =
|
|
||||||
\x -> \p ->
|
|
||||||
mkPrec x.s ! x.p ! p ;
|
|
||||||
|
|
||||||
-- make a string into a constant, of precedence p4
|
|
||||||
mkpConst : Str -> TermWithPrec =
|
|
||||||
\f ->
|
|
||||||
mkpPrec f p4 ;
|
|
||||||
|
|
||||||
-- make a string into a 1/2/3 -place prefix operator, of precedence p3
|
|
||||||
mkpFun1 : Str -> TermWithPrec -> TermWithPrec =
|
|
||||||
\f -> \x ->
|
|
||||||
mkpPrec (f ++ usePrec x p4) p3 ;
|
|
||||||
|
|
||||||
mkpFun2 : Str -> TermWithPrec -> TermWithPrec -> TermWithPrec =
|
|
||||||
\f -> \x -> \y ->
|
|
||||||
mkpPrec (f ++ usePrec x p4 ++ usePrec y p4) p3 ;
|
|
||||||
|
|
||||||
mkpFun3 : Str -> TermWithPrec -> TermWithPrec -> TermWithPrec -> TermWithPrec =
|
|
||||||
\f -> \x -> \y -> \z ->
|
|
||||||
mkpPrec (f ++ usePrec x p4 ++ usePrec y p4 ++ usePrec z p4) p3 ;
|
|
||||||
|
|
||||||
-- make a string a into non/left/right -associative infix operator, of precedence p
|
|
||||||
mkpInfix : Str -> Prec -> TermWithPrec -> TermWithPrec -> TermWithPrec =
|
|
||||||
\f -> \p -> \x -> \y ->
|
|
||||||
mkpPrec (usePrec x (nextPrec ! p) ++ f ++ usePrec y (nextPrec ! p)) p ;
|
|
||||||
mkpInfixL : Str -> Prec -> TermWithPrec -> TermWithPrec -> TermWithPrec =
|
|
||||||
\f -> \p -> \x -> \y ->
|
|
||||||
mkpPrec (usePrec x p ++ f ++ usePrec y (nextPrec ! p)) p ;
|
|
||||||
mkpInfixR : Str -> Prec -> TermWithPrec -> TermWithPrec -> TermWithPrec =
|
|
||||||
\f -> \p -> \x -> \y ->
|
|
||||||
mkpPrec (usePrec x (nextPrec ! p) ++ f ++ usePrec y p) p ;
|
|
||||||
} ;
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
--1 Predefined functions for concrete syntax
|
|
||||||
|
|
||||||
-- The definitions of these constants are hard-coded in GF, and defined
|
|
||||||
-- in [AppPredefined.hs ../src/GF/Grammar/AppPredefined.hs]. Applying
|
|
||||||
-- them to run-time variables leads to compiler errors that are often
|
|
||||||
-- only detected at the code generation time.
|
|
||||||
|
|
||||||
resource Predef = {
|
|
||||||
|
|
||||||
-- This type of booleans is for internal use only.
|
|
||||||
|
|
||||||
param PBool = PTrue | PFalse ;
|
|
||||||
|
|
||||||
oper Error : Type = variants {} ; -- the empty type
|
|
||||||
oper Int : Type = variants {} ; -- the type of integers
|
|
||||||
oper Ints : Int -> Type = variants {} ; -- the type of integers from 0 to n
|
|
||||||
|
|
||||||
oper error : Str -> Error = variants {} ; -- forms error message
|
|
||||||
oper length : Tok -> Int = variants {} ; -- length of string
|
|
||||||
oper drop : Int -> Tok -> Tok = variants {} ; -- drop prefix of length
|
|
||||||
oper take : Int -> Tok -> Tok = variants {} ; -- take prefix of length
|
|
||||||
oper tk : Int -> Tok -> Tok = variants {} ; -- drop suffix of length
|
|
||||||
oper dp : Int -> Tok -> Tok = variants {} ; -- take suffix of length
|
|
||||||
oper eqInt : Int -> Int -> PBool = variants {} ; -- test if equal integers
|
|
||||||
oper lessInt: Int -> Int -> PBool = variants {} ; -- test order of integers
|
|
||||||
oper plus : Int -> Int -> Int = variants {} ; -- add integers
|
|
||||||
oper eqStr : Tok -> Tok -> PBool = variants {} ; -- test if equal strings
|
|
||||||
oper occur : Tok -> Tok -> PBool = variants {} ; -- test if occurs as substring
|
|
||||||
oper occurs : Tok -> Tok -> PBool = variants {} ; -- test if any char occurs
|
|
||||||
oper show : (P : Type) -> P -> Tok = variants {} ; -- convert param to string
|
|
||||||
oper read : (P : Type) -> Tok -> P = variants {} ; -- convert string to param
|
|
||||||
oper toStr : (L : Type) -> L -> Str = variants {} ; -- find the "first" string
|
|
||||||
oper mapStr : (L : Type) -> (Str -> Str) -> L -> L = variants {} ;
|
|
||||||
-- map all strings in a data structure; experimental ---
|
|
||||||
|
|
||||||
} ;
|
|
||||||
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
abstract PredefAbs = {
|
|
||||||
cat Int ; String ; Float ;
|
|
||||||
} ;
|
|
||||||
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
concrete PredefCnc of PredefAbs = {
|
|
||||||
lincat
|
|
||||||
Int, Float, String = {s : Str} ;
|
|
||||||
} ;
|
|
||||||
@@ -1,142 +0,0 @@
|
|||||||
--1 The GF Prelude
|
|
||||||
|
|
||||||
-- This file defines some prelude facilities usable in all grammars.
|
|
||||||
|
|
||||||
resource Prelude = open (Predef=Predef) in {
|
|
||||||
|
|
||||||
oper
|
|
||||||
|
|
||||||
--2 Strings, records, and tables
|
|
||||||
|
|
||||||
SS : Type = {s : Str} ;
|
|
||||||
ss : Str -> SS = \s -> {s = s} ;
|
|
||||||
ss2 : (_,_ : Str) -> SS = \x,y -> ss (x ++ y) ;
|
|
||||||
ss3 : (_,_ ,_: Str) -> SS = \x,y,z -> ss (x ++ y ++ z) ;
|
|
||||||
|
|
||||||
cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ;
|
|
||||||
cc3 : (_,_,_ : SS) -> SS = \x,y,z -> ss (x.s ++ y.s ++ z.s) ;
|
|
||||||
|
|
||||||
SS1 : Type -> Type = \P -> {s : P => Str} ;
|
|
||||||
ss1 : (A : Type) -> Str -> SS1 A = \A,s -> {s = table {_ => s}} ;
|
|
||||||
|
|
||||||
SP1 : Type -> Type = \P -> {s : Str ; p : P} ;
|
|
||||||
sp1 : (A : Type) -> Str -> A -> SP1 A = \_,s,a -> {s = s ; p = a} ;
|
|
||||||
|
|
||||||
constTable : (A,B : Type) -> B -> A => B = \_,_,b -> \\_ => b ;
|
|
||||||
constStr : (A : Type) -> Str -> A => Str = \A -> constTable A Str ;
|
|
||||||
|
|
||||||
-- Discontinuous constituents.
|
|
||||||
|
|
||||||
SD2 : Type = {s1,s2 : Str} ;
|
|
||||||
sd2 : (_,_ : Str) -> SD2 = \x,y -> {s1 = x ; s2 = y} ;
|
|
||||||
|
|
||||||
|
|
||||||
--2 Optional elements
|
|
||||||
|
|
||||||
-- Missing form.
|
|
||||||
|
|
||||||
nonExist : Str = variants {} ;
|
|
||||||
|
|
||||||
-- Optional string with preference on the string vs. empty.
|
|
||||||
|
|
||||||
optStr : Str -> Str = \s -> variants {s ; []} ;
|
|
||||||
strOpt : Str -> Str = \s -> variants {[] ; s} ;
|
|
||||||
|
|
||||||
-- Free order between two strings.
|
|
||||||
|
|
||||||
bothWays : Str -> Str -> Str = \x,y -> variants {x ++ y ; y ++ x} ;
|
|
||||||
|
|
||||||
-- Parametric order between two strings.
|
|
||||||
|
|
||||||
preOrPost : Bool -> Str -> Str -> Str = \pr,x,y ->
|
|
||||||
if_then_Str pr (x ++ y) (y ++ x) ;
|
|
||||||
|
|
||||||
--2 Infixes. prefixes, and postfixes
|
|
||||||
|
|
||||||
-- Fixes with precedences are defined in [Precedence Precedence.html].
|
|
||||||
|
|
||||||
infixSS : Str -> SS -> SS -> SS = \f,x,y -> ss (x.s ++ f ++ y.s) ;
|
|
||||||
prefixSS : Str -> SS -> SS = \f,x -> ss (f ++ x.s) ;
|
|
||||||
postfixSS : Str -> SS -> SS = \f,x -> ss (x.s ++ f) ;
|
|
||||||
embedSS : Str -> Str -> SS -> SS = \f,g,x -> ss (f ++ x.s ++ g) ;
|
|
||||||
|
|
||||||
|
|
||||||
--2 Booleans
|
|
||||||
|
|
||||||
param Bool = True | False ;
|
|
||||||
|
|
||||||
oper
|
|
||||||
if_then_else : (A : Type) -> Bool -> A -> A -> A = \_,c,d,e ->
|
|
||||||
case c of {
|
|
||||||
True => d ; ---- should not need to qualify
|
|
||||||
False => e
|
|
||||||
} ;
|
|
||||||
|
|
||||||
andB : (_,_ : Bool) -> Bool = \a,b -> if_then_else Bool a b False ;
|
|
||||||
orB : (_,_ : Bool) -> Bool = \a,b -> if_then_else Bool a True b ;
|
|
||||||
notB : Bool -> Bool = \a -> if_then_else Bool a False True ;
|
|
||||||
|
|
||||||
if_then_Str : Bool -> Str -> Str -> Str = if_then_else Str ;
|
|
||||||
|
|
||||||
onlyIf : Bool -> Str -> Str = \b,s -> case b of {
|
|
||||||
True => s ;
|
|
||||||
_ => nonExist
|
|
||||||
} ;
|
|
||||||
|
|
||||||
-- Interface to internal booleans
|
|
||||||
|
|
||||||
pbool2bool : Predef.PBool -> Bool = \b -> case b of {
|
|
||||||
Predef.PFalse => False ; Predef.PTrue => True
|
|
||||||
} ;
|
|
||||||
|
|
||||||
init : Tok -> Tok = Predef.tk 1 ;
|
|
||||||
last : Tok -> Tok = Predef.dp 1 ;
|
|
||||||
|
|
||||||
--2 High-level acces to Predef operations
|
|
||||||
|
|
||||||
isNil : Tok -> Bool = \b -> pbool2bool (Predef.eqStr [] b) ;
|
|
||||||
|
|
||||||
ifTok : (A : Type) -> Tok -> Tok -> A -> A -> A = \A,t,u,a,b ->
|
|
||||||
case Predef.eqStr t u of {Predef.PTrue => a ; Predef.PFalse => b} ;
|
|
||||||
|
|
||||||
--2 Lexer-related operations
|
|
||||||
|
|
||||||
-- Bind together two tokens in some lexers, either obligatorily or optionally
|
|
||||||
|
|
||||||
oper
|
|
||||||
glue : Str -> Str -> Str = \x,y -> x ++ BIND ++ y ;
|
|
||||||
glueOpt : Str -> Str -> Str = \x,y -> variants {glue x y ; x ++ y} ;
|
|
||||||
noglueOpt : Str -> Str -> Str = \x,y -> variants {x ++ y ; glue x y} ;
|
|
||||||
|
|
||||||
-- Force capitalization of next word in some unlexers
|
|
||||||
|
|
||||||
capitalize : Str -> Str = \s -> CAPIT ++ s ;
|
|
||||||
|
|
||||||
-- These should be hidden, and never changed since they are hardcoded in (un)lexers
|
|
||||||
|
|
||||||
BIND : Str = "&+" ;
|
|
||||||
PARA : Str = "&-" ;
|
|
||||||
CAPIT : Str = "&|" ;
|
|
||||||
|
|
||||||
--2 Miscellaneous
|
|
||||||
|
|
||||||
-- Identity function
|
|
||||||
|
|
||||||
id : (A : Type) -> A -> A = \_,a -> a ;
|
|
||||||
|
|
||||||
-- Parentheses
|
|
||||||
|
|
||||||
paren : Str -> Str = \s -> "(" ++ s ++ ")" ;
|
|
||||||
parenss : SS -> SS = \s -> ss (paren s.s) ;
|
|
||||||
|
|
||||||
-- Zero, one, two, or more (elements in a list etc)
|
|
||||||
|
|
||||||
param
|
|
||||||
ENumber = E0 | E1 | E2 | Emore ;
|
|
||||||
|
|
||||||
oper
|
|
||||||
eNext : ENumber -> ENumber = \e -> case e of {
|
|
||||||
E0 => E1 ; E1 => E2 ; _ => Emore} ;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,193 +0,0 @@
|
|||||||
module Main where
|
|
||||||
|
|
||||||
import Control.Monad
|
|
||||||
import Data.Maybe
|
|
||||||
import System.Cmd
|
|
||||||
import System.Directory
|
|
||||||
import System.Environment
|
|
||||||
import System.Exit
|
|
||||||
|
|
||||||
-- Make commands for compiling and testing resource grammars.
|
|
||||||
-- usage: runghc Make ((present? OPT?) | (clone FILE))? LANGS?
|
|
||||||
-- where
|
|
||||||
-- - OPT = (lang | api | math | pgf | test | demo | clean)
|
|
||||||
-- - LANGS has the form e.g. langs=Eng,Fin,Rus
|
|
||||||
-- - clone with a flag file=FILENAME clones the file to the specified languages,
|
|
||||||
-- by replacing the 3-letter language name of the original in both
|
|
||||||
-- the filename and the body
|
|
||||||
-- with each name in the list (default: all languages)
|
|
||||||
-- With no argument, lang and api are done, in this order.
|
|
||||||
-- See 'make' below for what is done by which command.
|
|
||||||
|
|
||||||
default_gf = "../../dist/build/gf/gf"
|
|
||||||
|
|
||||||
presApiPath = "-path=api:present"
|
|
||||||
|
|
||||||
-- the languages have long directory names and short ISO codes (3 letters)
|
|
||||||
-- we also give the decodings for postprocessing linearizations, as long as grammars
|
|
||||||
-- don't support all flags needed; they are used in tests
|
|
||||||
|
|
||||||
langsCoding = [
|
|
||||||
(("arabic", "Ara"),""),
|
|
||||||
(("bulgarian","Bul"),""),
|
|
||||||
(("catalan", "Cat"),""),
|
|
||||||
(("danish", "Dan"),""),
|
|
||||||
(("english", "Eng"),""),
|
|
||||||
(("finnish", "Fin"),""),
|
|
||||||
(("french", "Fre"),""),
|
|
||||||
(("hindi", "Hin"),"to_devanagari"),
|
|
||||||
(("german", "Ger"),""),
|
|
||||||
(("interlingua","Ina"),""),
|
|
||||||
(("italian", "Ita"),""),
|
|
||||||
(("norwegian","Nor"),""),
|
|
||||||
(("russian", "Rus"),""),
|
|
||||||
(("spanish", "Spa"),""),
|
|
||||||
(("swedish", "Swe"),""),
|
|
||||||
(("thai", "Tha"),"to_thai")
|
|
||||||
]
|
|
||||||
|
|
||||||
langs = map fst langsCoding
|
|
||||||
|
|
||||||
-- languagues for which to compile Lang
|
|
||||||
langsLang = langs `except` ["Ara"]
|
|
||||||
|
|
||||||
-- languages for which to compile Try
|
|
||||||
langsAPI = langsLang `except` ["Ara","Bul","Hin","Ina","Rus","Tha"]
|
|
||||||
|
|
||||||
-- languages for which to compile Mathematical
|
|
||||||
langsMath = langsAPI
|
|
||||||
|
|
||||||
-- languages for which to run treebank test
|
|
||||||
langsTest = langsLang `except` ["Ara","Bul","Cat","Hin","Rus","Spa","Tha"]
|
|
||||||
|
|
||||||
-- languages for which to run demo test
|
|
||||||
langsDemo = langsLang `except` ["Ara","Hin","Ina","Tha"]
|
|
||||||
|
|
||||||
-- languages for which langs.pgf is built
|
|
||||||
langsPGF = langsTest `only` ["Eng","Fre","Swe"]
|
|
||||||
|
|
||||||
-- languages for which Compatibility exists (to be extended)
|
|
||||||
langsCompat = langsLang `only` ["Cat","Eng","Fin","Fre","Ita","Spa","Swe"]
|
|
||||||
|
|
||||||
treebankExx = "exx-resource.gft"
|
|
||||||
treebankResults = "exx-resource.gftb"
|
|
||||||
|
|
||||||
main = do
|
|
||||||
xx <- getArgs
|
|
||||||
make xx
|
|
||||||
|
|
||||||
make :: [String] -> IO ()
|
|
||||||
make xx = do
|
|
||||||
let ifx opt act = if null xx || elem opt xx then act >> return () else return ()
|
|
||||||
let ifxx opt act = if elem opt xx then act >> return () else return ()
|
|
||||||
let pres = elem "present" xx
|
|
||||||
let dir = if pres then "../present" else "../alltenses"
|
|
||||||
|
|
||||||
let optl ls = maybe ls id $ getOptLangs xx
|
|
||||||
|
|
||||||
ifx "lang" $ do
|
|
||||||
mapM_ (gfc pres [] . lang) (optl langsLang)
|
|
||||||
copy "*/*.gfo" dir
|
|
||||||
ifx "compat" $ do
|
|
||||||
mapM_ (gfc pres [] . compat) (optl langsCompat)
|
|
||||||
copy "*/Compatibility*.gfo" dir
|
|
||||||
ifx "api" $ do
|
|
||||||
mapM_ (gfc pres presApiPath . try) (optl langsAPI)
|
|
||||||
copy "*/*.gfo" dir
|
|
||||||
ifx "math" $ do
|
|
||||||
mapM_ (gfc False [] . math) (optl langsMath)
|
|
||||||
copy "mathematical/*.gfo" "../mathematical"
|
|
||||||
mapM_ (gfc False [] . symbolic) (optl langsMath)
|
|
||||||
copy "mathematical/Symbolic*.gfo" "../mathematical"
|
|
||||||
ifxx "pgf" $ do
|
|
||||||
run_gfc $ ["-s","--make","--name=langs","--parser=off",
|
|
||||||
"--output-dir=" ++ dir]
|
|
||||||
++ [dir ++ "/Lang" ++ la ++ ".gfo" | (_,la) <- optl langsPGF]
|
|
||||||
ifxx "test" $ do
|
|
||||||
let ls = optl langsTest
|
|
||||||
gf (treeb "Lang" ls) $ unwords [dir ++ "/Lang" ++ la ++ ".gfo" | (_,la) <- ls]
|
|
||||||
ifxx "demo" $ do
|
|
||||||
let ls = optl langsDemo
|
|
||||||
gf (demos "Demo" ls) $ unwords ["demo/Demo" ++ la ++ ".gf" | (_,la) <- ls]
|
|
||||||
ifxx "clean" $ do
|
|
||||||
system "rm -f */*.gfo ../alltenses/*.gfo ../present/*.gfo"
|
|
||||||
ifxx "clone" $ do
|
|
||||||
let (pref,lang) = case getLangName xx of
|
|
||||||
Just pl -> pl
|
|
||||||
_ -> error "expected flag option file=ppppppLLL.gf"
|
|
||||||
s <- readFile (pref ++ lang ++ ".gf")
|
|
||||||
mapM_ (\la -> writeFile (pref ++ la ++ ".gf") (replaceLang lang la s)) (map snd (optl langs))
|
|
||||||
return ()
|
|
||||||
|
|
||||||
gfc pres ppath file = do
|
|
||||||
let preproc = if pres then "-preproc=./mkPresent" else ""
|
|
||||||
let path = if pres then ppath else ""
|
|
||||||
putStrLn $ "Compiling " ++ file
|
|
||||||
run_gfc ["-s","-src", preproc, path, file]
|
|
||||||
|
|
||||||
gf comm file = do
|
|
||||||
putStrLn $ "Reading " ++ file
|
|
||||||
let cmd = "echo \"" ++ comm ++ "\" | gf -s " ++ file
|
|
||||||
putStrLn cmd
|
|
||||||
system cmd
|
|
||||||
|
|
||||||
treeb abstr ls = "rf -lines -tree -file=" ++ treebankExx ++
|
|
||||||
" | l -treebank " ++ unlexer abstr ls ++ " | wf -file=" ++ treebankResults
|
|
||||||
|
|
||||||
demos abstr ls = "gr -number=100 | l -treebank " ++ unlexer abstr ls ++
|
|
||||||
" | ps -to_html | wf -file=resdemo.html"
|
|
||||||
|
|
||||||
lang (lla,la) = lla ++ "/All" ++ la ++ ".gf"
|
|
||||||
compat (lla,la) = lla ++ "/Compatibility" ++ la ++ ".gf"
|
|
||||||
try (lla,la) = "api/Try" ++ la ++ ".gf"
|
|
||||||
math (lla,la) = "mathematical/Mathematical" ++ la ++ ".gf"
|
|
||||||
symbolic (lla,la) = "mathematical/Symbolic" ++ la ++ ".gf"
|
|
||||||
|
|
||||||
except ls es = filter (flip notElem es . snd) ls
|
|
||||||
only ls es = filter (flip elem es . snd) ls
|
|
||||||
|
|
||||||
-- list of languages overriding the definitions above
|
|
||||||
getOptLangs args = case [ls | a <- args, let (f,ls) = splitAt 6 a, f=="langs="] of
|
|
||||||
ls:_ -> return $ findLangs $ seps ls
|
|
||||||
_ -> Nothing
|
|
||||||
where
|
|
||||||
seps = words . map (\c -> if c==',' then ' ' else c)
|
|
||||||
findLangs ls = [lang | lang@(_,la) <- langs, elem la ls]
|
|
||||||
|
|
||||||
-- the file name has the form p....pLLL.gf, i.e. 3-letter lang name, suffix .gf
|
|
||||||
getLangName args = case [ls | a <- args, let (f,ls) = splitAt 5 a, f=="file="] of
|
|
||||||
fi:_ -> let (nal,ferp) = splitAt 3 (drop 3 (reverse fi)) in return (reverse ferp,reverse nal)
|
|
||||||
_ -> Nothing
|
|
||||||
|
|
||||||
replaceLang s1 s2 = repl where
|
|
||||||
repl s = case s of
|
|
||||||
c:cs -> case splitAt lgs s of
|
|
||||||
(pre,rest) | pre == s1 -> s2 ++ repl rest
|
|
||||||
_ -> c : repl cs
|
|
||||||
_ -> s
|
|
||||||
lgs = 3 -- length s1
|
|
||||||
|
|
||||||
unlexer abstr ls =
|
|
||||||
"-unlexer=\\\"" ++ unwords
|
|
||||||
[abstr ++ la ++ "=" ++ unl |
|
|
||||||
lla@(_,la) <- ls, let unl = unlex lla, not (null unl)] ++
|
|
||||||
"\\\""
|
|
||||||
where
|
|
||||||
unlex lla = maybe "" id $ lookup lla langsCoding
|
|
||||||
|
|
||||||
-- | Runs the gf executable in compile mode with the given arguments.
|
|
||||||
run_gfc :: [String] -> IO ()
|
|
||||||
run_gfc args =
|
|
||||||
do let args' = ["-batch","-gf-lib-path=../"] ++ filter (not . null) args ++ ["+RTS"] ++ rts_flags ++ ["-RTS"]
|
|
||||||
putStrLn $ "Running: " ++ default_gf ++ " " ++ unwords (map showArg args')
|
|
||||||
e <- rawSystem default_gf args'
|
|
||||||
case e of
|
|
||||||
ExitSuccess -> return ()
|
|
||||||
ExitFailure i -> putStrLn $ "gf exited with exit code: " ++ show i
|
|
||||||
where rts_flags = ["-K100M"]
|
|
||||||
showArg arg = "'" ++ arg ++ "'"
|
|
||||||
|
|
||||||
copy :: String -> String -> IO ()
|
|
||||||
copy from to =
|
|
||||||
do system $ "cp " ++ from ++ " " ++ to
|
|
||||||
return ()
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
RUNGHC=runghc
|
|
||||||
RUNMAKE=$(RUNGHC) Make.hs
|
|
||||||
|
|
||||||
.PHONY: all present alltenses lang api math test demo synopsis link compiled clean
|
|
||||||
|
|
||||||
all: link present alltenses math
|
|
||||||
|
|
||||||
present:
|
|
||||||
$(RUNMAKE) present lang
|
|
||||||
$(RUNMAKE) present api
|
|
||||||
|
|
||||||
alltenses:
|
|
||||||
$(RUNMAKE) lang
|
|
||||||
$(RUNMAKE) api
|
|
||||||
|
|
||||||
lang:
|
|
||||||
$(RUNMAKE) lang
|
|
||||||
|
|
||||||
api:
|
|
||||||
$(RUNMAKE) api
|
|
||||||
|
|
||||||
math:
|
|
||||||
$(RUNMAKE) math
|
|
||||||
|
|
||||||
compat:
|
|
||||||
gf -batch */Compatibility*.gf
|
|
||||||
cp -p */Compatibility*.gfo ../alltenses/
|
|
||||||
cp -p */Compatibility*.gfo ../present/
|
|
||||||
# $(RUNMAKE) present compat
|
|
||||||
# $(RUNMAKE) compat
|
|
||||||
|
|
||||||
test:
|
|
||||||
$(RUNMAKE) test
|
|
||||||
|
|
||||||
demo:
|
|
||||||
$(RUNMAKE) demo
|
|
||||||
|
|
||||||
synopsis:
|
|
||||||
cd doc ; $(RUNGHC) MkSynopsis ; cd ..
|
|
||||||
|
|
||||||
link:
|
|
||||||
chmod a+x mkPresent
|
|
||||||
|
|
||||||
compiled:
|
|
||||||
(cd .. && tar -zcf resource-compiled.tar.gz prelude alltenses present mathematical)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
$(RUNMAKE) clean
|
|
||||||
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
--1 Adjective: Adjectives and Adjectival Phrases
|
|
||||||
|
|
||||||
abstract Adjective = Cat ** {
|
|
||||||
|
|
||||||
fun
|
|
||||||
|
|
||||||
-- The principal ways of forming an adjectival phrase are
|
|
||||||
-- positive, comparative, relational, reflexive-relational, and
|
|
||||||
-- elliptic-relational.
|
|
||||||
-- (The superlative use is covered in [Noun Noun.html].$SuperlA$.)
|
|
||||||
|
|
||||||
PositA : A -> AP ; -- warm
|
|
||||||
ComparA : A -> NP -> AP ; -- warmer than I
|
|
||||||
ComplA2 : A2 -> NP -> AP ; -- married to her
|
|
||||||
ReflA2 : A2 -> AP ; -- married to itself
|
|
||||||
UseA2 : A2 -> A ; -- married
|
|
||||||
|
|
||||||
-- Sentence and question complements defined for all adjectival
|
|
||||||
-- phrases, although the semantics is only clear for some adjectives.
|
|
||||||
|
|
||||||
SentAP : AP -> SC -> AP ; -- good that she is here
|
|
||||||
|
|
||||||
-- An adjectival phrase can be modified by an *adadjective*, such as "very".
|
|
||||||
|
|
||||||
AdAP : AdA -> AP -> AP ; -- very warm
|
|
||||||
|
|
||||||
-- The formation of adverbs from adjective (e.g. "quickly") is covered
|
|
||||||
-- in [Adverb Adverb.html].
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
--1 Adverb: Adverbs and Adverbial Phrases
|
|
||||||
|
|
||||||
abstract Adverb = Cat ** {
|
|
||||||
|
|
||||||
fun
|
|
||||||
|
|
||||||
-- The two main ways of forming adverbs are from adjectives and by
|
|
||||||
-- prepositions from noun phrases.
|
|
||||||
|
|
||||||
PositAdvAdj : A -> Adv ; -- warmly
|
|
||||||
PrepNP : Prep -> NP -> Adv ; -- in the house
|
|
||||||
|
|
||||||
-- Comparative adverbs have a noun phrase or a sentence as object of
|
|
||||||
-- comparison.
|
|
||||||
|
|
||||||
ComparAdvAdj : CAdv -> A -> NP -> Adv ; -- more warmly than John
|
|
||||||
ComparAdvAdjS : CAdv -> A -> S -> Adv ; -- more warmly than he runs
|
|
||||||
|
|
||||||
-- Adverbs can be modified by 'adadjectives', just like adjectives.
|
|
||||||
|
|
||||||
AdAdv : AdA -> Adv -> Adv ; -- very quickly
|
|
||||||
|
|
||||||
-- Subordinate clauses can function as adverbs.
|
|
||||||
|
|
||||||
SubjS : Subj -> S -> Adv ; -- when she sleeps
|
|
||||||
|
|
||||||
-- Comparison adverbs also work as numeral adverbs.
|
|
||||||
|
|
||||||
AdnCAdv : CAdv -> AdN ; -- less (than five)
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
--1 Obsolete constructs included for backward-compatibility
|
|
||||||
|
|
||||||
abstract Backward = Cat ** {
|
|
||||||
|
|
||||||
|
|
||||||
-- from Cat
|
|
||||||
|
|
||||||
cat
|
|
||||||
Slash ;
|
|
||||||
|
|
||||||
fun
|
|
||||||
|
|
||||||
-- from Verb 19/4/2008
|
|
||||||
|
|
||||||
ComplV2 : V2 -> NP -> VP ; -- use it
|
|
||||||
ComplV3 : V3 -> NP -> NP -> VP ; -- send a message to her
|
|
||||||
ComplV2V : V2V -> NP -> VP -> VP ; -- cause it to burn
|
|
||||||
ComplV2S : V2S -> NP -> S -> VP ; -- tell me that it rains
|
|
||||||
ComplV2Q : V2Q -> NP -> QS -> VP ; -- ask me who came
|
|
||||||
ComplV2A : V2A -> NP -> AP -> VP ; -- paint it red
|
|
||||||
|
|
||||||
ReflV2 : V2 -> VP ; -- use itself
|
|
||||||
|
|
||||||
UseVQ : VQ -> V2 ; -- ask (a question)
|
|
||||||
UseVS : VS -> V2 ; -- know (a secret)
|
|
||||||
|
|
||||||
|
|
||||||
-- from Sentence 19/4/2008
|
|
||||||
|
|
||||||
SlashV2 : NP -> V2 -> Slash ; -- (whom) he sees
|
|
||||||
SlashVVV2 : NP -> VV -> V2 -> Slash; -- (whom) he wants to see
|
|
||||||
|
|
||||||
-- from Noun 19/4/2008
|
|
||||||
|
|
||||||
NumInt : Int -> Num ; -- 51
|
|
||||||
OrdInt : Int -> Ord ; -- 51st (DEPRECATED)
|
|
||||||
NoOrd : Ord ;
|
|
||||||
|
|
||||||
-- 20/4
|
|
||||||
DetSg : Quant -> Ord -> Det ; -- the best man
|
|
||||||
DetPl : Quant -> Num -> Ord -> Det ; -- the five best men
|
|
||||||
NoNum : Num ;
|
|
||||||
|
|
||||||
-- 22/4
|
|
||||||
DefArt : Quant ; -- the (house), the (houses)
|
|
||||||
IndefArt : Quant ; -- a (house), (houses)
|
|
||||||
MassDet : Quant ; -- (beer)
|
|
||||||
|
|
||||||
-- from Structural 19/4/2008
|
|
||||||
|
|
||||||
that_NP : NP ;
|
|
||||||
these_NP : NP ;
|
|
||||||
this_NP : NP ;
|
|
||||||
those_NP : NP ;
|
|
||||||
|
|
||||||
whichPl_IDet : IDet ;
|
|
||||||
whichSg_IDet : IDet ;
|
|
||||||
|
|
||||||
-- from Adverb
|
|
||||||
|
|
||||||
AdvSC : SC -> Adv ; -- that he arrives ---- REMOVE?
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,134 +0,0 @@
|
|||||||
--1 Cat: the Category System
|
|
||||||
|
|
||||||
-- The category system is central to the library in the sense
|
|
||||||
-- that the other modules ($Adjective$, $Adverb$, $Noun$, $Verb$ etc)
|
|
||||||
-- communicate through it. This means that a e.g. a function using
|
|
||||||
-- $NP$s in $Verb$ need not know how $NP$s are constructed in $Noun$:
|
|
||||||
-- it is enough that both $Verb$ and $Noun$ use the same type $NP$,
|
|
||||||
-- which is given here in $Cat$.
|
|
||||||
--
|
|
||||||
-- Some categories are inherited from [``Common`` Common.html].
|
|
||||||
-- The reason they are defined there is that they have the same
|
|
||||||
-- implementation in all languages in the resource (typically,
|
|
||||||
-- just a string). These categories are
|
|
||||||
-- $AdA, AdN, AdV, Adv, Ant, CAdv, IAdv, PConj, Phr$,
|
|
||||||
-- $Pol, SC, Tense, Text, Utt, Voc$.
|
|
||||||
--
|
|
||||||
-- Moreover, the list categories $ListAdv, ListAP, ListNP, ListS$
|
|
||||||
-- are defined on $Conjunction$ and only used locally there.
|
|
||||||
|
|
||||||
|
|
||||||
abstract Cat = Common ** {
|
|
||||||
|
|
||||||
cat
|
|
||||||
|
|
||||||
--2 Sentences and clauses
|
|
||||||
|
|
||||||
-- Constructed in [Sentence Sentence.html], and also in
|
|
||||||
-- [Idiom Idiom.html].
|
|
||||||
|
|
||||||
S ; -- declarative sentence e.g. "she lived here"
|
|
||||||
QS ; -- question e.g. "where did she live"
|
|
||||||
RS ; -- relative e.g. "in which she lived"
|
|
||||||
Cl ; -- declarative clause, with all tenses e.g. "she looks at this"
|
|
||||||
ClSlash;-- clause missing NP (S/NP in GPSG) e.g. "she looks at"
|
|
||||||
SSlash ;-- sentence missing NP e.g. "she has looked at"
|
|
||||||
Imp ; -- imperative e.g. "look at this"
|
|
||||||
|
|
||||||
--2 Questions and interrogatives
|
|
||||||
|
|
||||||
-- Constructed in [Question Question.html].
|
|
||||||
|
|
||||||
QCl ; -- question clause, with all tenses e.g. "why does she walk"
|
|
||||||
IP ; -- interrogative pronoun e.g. "who"
|
|
||||||
IComp ; -- interrogative complement of copula e.g. "where"
|
|
||||||
IDet ; -- interrogative determiner e.g. "how many"
|
|
||||||
IQuant; -- interrogative quantifier e.g. "which"
|
|
||||||
|
|
||||||
--2 Relative clauses and pronouns
|
|
||||||
|
|
||||||
-- Constructed in [Relative Relative.html].
|
|
||||||
|
|
||||||
RCl ; -- relative clause, with all tenses e.g. "in which she lives"
|
|
||||||
RP ; -- relative pronoun e.g. "in which"
|
|
||||||
|
|
||||||
--2 Verb phrases
|
|
||||||
|
|
||||||
-- Constructed in [Verb Verb.html].
|
|
||||||
|
|
||||||
VP ; -- verb phrase e.g. "is very warm"
|
|
||||||
Comp ; -- complement of copula, such as AP e.g. "very warm"
|
|
||||||
VPSlash ; -- verb phrase missing complement e.g. "give to John"
|
|
||||||
|
|
||||||
--2 Adjectival phrases
|
|
||||||
|
|
||||||
-- Constructed in [Adjective Adjective.html].
|
|
||||||
|
|
||||||
AP ; -- adjectival phrase e.g. "very warm"
|
|
||||||
|
|
||||||
--2 Nouns and noun phrases
|
|
||||||
|
|
||||||
-- Constructed in [Noun Noun.html].
|
|
||||||
-- Many atomic noun phrases e.g. "everybody"
|
|
||||||
-- are constructed in [Structural Structural.html].
|
|
||||||
-- The determiner structure is
|
|
||||||
-- ``` Predet (QuantSg | QuantPl Num) Ord
|
|
||||||
-- as defined in [Noun Noun.html].
|
|
||||||
|
|
||||||
CN ; -- common noun (without determiner) e.g. "red house"
|
|
||||||
NP ; -- noun phrase (subject or object) e.g. "the red house"
|
|
||||||
Pron ; -- personal pronoun e.g. "she"
|
|
||||||
Det ; -- determiner phrase e.g. "those seven"
|
|
||||||
Predet ; -- predeterminer (prefixed Quant) e.g. "all"
|
|
||||||
Quant ; -- quantifier ('nucleus' of Det) e.g. "this/these"
|
|
||||||
Art ; -- article e.g. "the"
|
|
||||||
Num ; -- number determining element e.g. "seven"
|
|
||||||
Card ; -- cardinal number e.g. "seven"
|
|
||||||
Ord ; -- ordinal number (used in Det) e.g. "seventh"
|
|
||||||
|
|
||||||
--2 Numerals
|
|
||||||
|
|
||||||
-- Constructed in [Numeral Numeral.html].
|
|
||||||
|
|
||||||
Numeral ; -- cardinal or ordinal in words e.g. "five/fifth"
|
|
||||||
Digits ; -- cardinal or ordinal in digits e.g. "1,000/1,000th"
|
|
||||||
|
|
||||||
--2 Structural words
|
|
||||||
|
|
||||||
-- Constructed in [Structural Structural.html].
|
|
||||||
|
|
||||||
Conj ; -- conjunction e.g. "and"
|
|
||||||
---b DConj ; -- distributed conjunction e.g. "both - and"
|
|
||||||
Subj ; -- subjunction e.g. "if"
|
|
||||||
Prep ; -- preposition, or just case e.g. "in"
|
|
||||||
|
|
||||||
--2 Words of open classes
|
|
||||||
|
|
||||||
-- These are constructed in [Lexicon Lexicon.html] and in
|
|
||||||
-- additional lexicon modules.
|
|
||||||
|
|
||||||
V ; -- one-place verb e.g. "sleep"
|
|
||||||
V2 ; -- two-place verb e.g. "love"
|
|
||||||
V3 ; -- three-place verb e.g. "show"
|
|
||||||
VV ; -- verb-phrase-complement verb e.g. "want"
|
|
||||||
VS ; -- sentence-complement verb e.g. "claim"
|
|
||||||
VQ ; -- question-complement verb e.g. "wonder"
|
|
||||||
VA ; -- adjective-complement verb e.g. "look"
|
|
||||||
V2V ; -- verb with NP and V complement e.g. "cause"
|
|
||||||
V2S ; -- verb with NP and S complement e.g. "tell"
|
|
||||||
V2Q ; -- verb with NP and Q complement e.g. "ask"
|
|
||||||
V2A ; -- verb with NP and AP complement e.g. "paint"
|
|
||||||
|
|
||||||
A ; -- one-place adjective e.g. "warm"
|
|
||||||
A2 ; -- two-place adjective e.g. "divisible"
|
|
||||||
|
|
||||||
N ; -- common noun e.g. "house"
|
|
||||||
N2 ; -- relational noun e.g. "son"
|
|
||||||
N3 ; -- three-place relational noun e.g. "connection"
|
|
||||||
PN ; -- proper name e.g. "Paris"
|
|
||||||
|
|
||||||
-- DEPRECATED: QuantSg, QuantPl
|
|
||||||
--- QuantSg ;-- quantifier ('nucleus' of sing. Det) e.g. "every"
|
|
||||||
--- QuantPl ;-- quantifier ('nucleus' of plur. Det) e.g. "many"
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
--1 Common: Structures with Common Implementations.
|
|
||||||
|
|
||||||
-- This module defines the categories that uniformly have the linearization
|
|
||||||
-- ${s : Str}$ in all languages.
|
|
||||||
|
|
||||||
-- Moreover, this module defines the abstract parameters of tense, polarity, and
|
|
||||||
-- anteriority, which are used in [``Phrase`` Phrase.html] to generate different
|
|
||||||
-- forms of sentences. Together they give 4 x 2 x 2 = 16 sentence forms.
|
|
||||||
|
|
||||||
-- These tenses are defined for all languages in the library. More tenses
|
|
||||||
-- can be defined in the language extensions, e.g. the "passé simple" of
|
|
||||||
-- Romance languages in [``ExtraRomance`` ../romance/ExtraRomance.gf].
|
|
||||||
|
|
||||||
abstract Common = {
|
|
||||||
|
|
||||||
cat
|
|
||||||
|
|
||||||
--2 Top-level units
|
|
||||||
|
|
||||||
-- Constructed in [``Text`` Text.html]: $Text$.
|
|
||||||
|
|
||||||
Text ; -- text consisting of several phrases e.g. "He is here. Why?"
|
|
||||||
|
|
||||||
-- Constructed in [``Phrase`` Phrase.html]:
|
|
||||||
|
|
||||||
Phr ; -- phrase in a text e.g. "but be quiet please"
|
|
||||||
Utt ; -- sentence, question, word... e.g. "be quiet"
|
|
||||||
Voc ; -- vocative or "please" e.g. "my darling"
|
|
||||||
PConj ; -- phrase-beginning conjunction e.g. "therefore"
|
|
||||||
|
|
||||||
-- Constructed in [``Sentence`` Sentence.html]:
|
|
||||||
|
|
||||||
SC ; -- embedded sentence or question e.g. "that it rains"
|
|
||||||
|
|
||||||
--2 Adverbs
|
|
||||||
|
|
||||||
-- Constructed in [``Adverb`` Adverb.html].
|
|
||||||
-- Many adverbs are constructed in [``Structural`` Structural.html].
|
|
||||||
|
|
||||||
Adv ; -- verb-phrase-modifying adverb e.g. "in the house"
|
|
||||||
AdV ; -- adverb directly attached to verb e.g. "always"
|
|
||||||
AdA ; -- adjective-modifying adverb e.g. "very"
|
|
||||||
AdN ; -- numeral-modifying adverb e.g. "more than"
|
|
||||||
IAdv ; -- interrogative adverb e.g. "why"
|
|
||||||
CAdv ; -- comparative adverb e.g. "more"
|
|
||||||
|
|
||||||
--2 Tense, polarity, and anteriority
|
|
||||||
|
|
||||||
Tense ; -- tense e.g. present, past, future
|
|
||||||
Pol ; -- polarity e.g. positive, negative
|
|
||||||
Ant ; -- anteriority e.g. simultaneous, anterior
|
|
||||||
|
|
||||||
fun
|
|
||||||
PPos, PNeg : Pol ; -- I sleep/don't sleep
|
|
||||||
|
|
||||||
TPres : Tense ;
|
|
||||||
ASimul : Ant ;
|
|
||||||
TPast, TFut, TCond : Tense ; -- I slept/will sleep/would sleep --# notpresent
|
|
||||||
AAnter : Ant ; -- I have slept --# notpresent
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
abstract Compatibility = Cat ** {
|
|
||||||
|
|
||||||
-- from Noun 19/4/2008
|
|
||||||
|
|
||||||
fun
|
|
||||||
NumInt : Int -> Num ; -- 57
|
|
||||||
OrdInt : Int -> Ord ; -- 57
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
--1 Conjunction: Coordination
|
|
||||||
|
|
||||||
-- Coordination is defined for many different categories; here is
|
|
||||||
-- a sample. The rules apply to *lists* of two or more elements,
|
|
||||||
-- and define two general patterns:
|
|
||||||
-- - ordinary conjunction: X,...X and X
|
|
||||||
-- - distributed conjunction: both X,...,X and X
|
|
||||||
--
|
|
||||||
--
|
|
||||||
-- $VP$ conjunctions are not covered here, because their applicability
|
|
||||||
-- depends on language. Some special cases are defined in
|
|
||||||
-- [``Extra`` ../abstract/Extra.gf].
|
|
||||||
|
|
||||||
|
|
||||||
abstract Conjunction = Cat ** {
|
|
||||||
|
|
||||||
--2 Rules
|
|
||||||
|
|
||||||
fun
|
|
||||||
ConjS : Conj -> [S] -> S ; -- "he walks and she runs"
|
|
||||||
ConjAP : Conj -> [AP] -> AP ; -- "cold and warm"
|
|
||||||
ConjNP : Conj -> [NP] -> NP ; -- "she or we"
|
|
||||||
ConjAdv : Conj -> [Adv] -> Adv ; -- "here or there"
|
|
||||||
|
|
||||||
---b DConjS : DConj -> [S] -> S ; -- "either he walks or she runs"
|
|
||||||
---b DConjAP : DConj -> [AP] -> AP ; -- "both warm and cold"
|
|
||||||
---b DConjNP : DConj -> [NP] -> NP ; -- "either he or she"
|
|
||||||
---b DConjAdv : DConj -> [Adv] -> Adv; -- "both here and there"
|
|
||||||
|
|
||||||
--2 Categories
|
|
||||||
|
|
||||||
-- These categories are only used in this module.
|
|
||||||
|
|
||||||
cat
|
|
||||||
[S]{2} ;
|
|
||||||
[Adv]{2} ;
|
|
||||||
[NP]{2} ;
|
|
||||||
[AP]{2} ;
|
|
||||||
|
|
||||||
--2 List constructors
|
|
||||||
|
|
||||||
-- The list constructors are derived from the list notation and therefore
|
|
||||||
-- not given explicitly. But here are their type signatures:
|
|
||||||
|
|
||||||
-- BaseC : C -> C -> [C] ; -- for C = S, AP, NP, Adv
|
|
||||||
-- ConsC : C -> [C] -> [C] ;
|
|
||||||
}
|
|
||||||
|
|
||||||
--.
|
|
||||||
-- *Note*. This module uses right-recursive lists. If backward
|
|
||||||
-- compatibility with API 0.9 is needed, use
|
|
||||||
-- [SeqConjunction SeqConjunction.html].
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
--1 More syntax rules
|
|
||||||
|
|
||||||
-- This module defines syntax rules that are not implemented in all
|
|
||||||
-- languages, but in more than one, so that it makes sense to offer a
|
|
||||||
-- common API.
|
|
||||||
|
|
||||||
abstract Extra = Cat ** {
|
|
||||||
|
|
||||||
fun
|
|
||||||
GenNP : NP -> Quant ; -- this man's
|
|
||||||
ComplBareVS : VS -> S -> VP ; -- know you go
|
|
||||||
|
|
||||||
StrandRelSlash : RP -> ClSlash -> RCl ; -- that he lives in
|
|
||||||
EmptyRelSlash : RP -> ClSlash -> RCl ; -- he lives in
|
|
||||||
StrandQuestSlash : IP -> ClSlash -> QCl ; -- whom does John live with
|
|
||||||
|
|
||||||
-- $VP$ conjunction, which has different fragments implemented in
|
|
||||||
-- different languages - never a full $VP$, though.
|
|
||||||
|
|
||||||
cat
|
|
||||||
VPI ;
|
|
||||||
[VPI] {2} ;
|
|
||||||
|
|
||||||
fun
|
|
||||||
MkVPI : VP -> VPI ;
|
|
||||||
ConjVPI : Conj -> [VPI] -> VPI ;
|
|
||||||
ComplVPIVV : VV -> VPI -> VP ;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
--1 Grammar: the Main Module of the Resource Grammar
|
|
||||||
|
|
||||||
-- This grammar is a collection of the different grammar modules,
|
|
||||||
-- To test the resource, import [``Lang`` Lang.html], which also contains
|
|
||||||
-- a lexicon.
|
|
||||||
|
|
||||||
abstract Grammar =
|
|
||||||
Noun,
|
|
||||||
Verb,
|
|
||||||
Adjective,
|
|
||||||
Adverb,
|
|
||||||
Numeral,
|
|
||||||
Sentence,
|
|
||||||
Question,
|
|
||||||
Relative,
|
|
||||||
Conjunction,
|
|
||||||
Phrase,
|
|
||||||
Text,
|
|
||||||
Structural,
|
|
||||||
Idiom ;
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
--1 Idiom: Idiomatic Expressions
|
|
||||||
|
|
||||||
abstract Idiom = Cat ** {
|
|
||||||
|
|
||||||
-- This module defines constructions that are formed in fixed ways,
|
|
||||||
-- often different even in closely related languages.
|
|
||||||
|
|
||||||
fun
|
|
||||||
ImpersCl : VP -> Cl ; -- it is hot
|
|
||||||
GenericCl : VP -> Cl ; -- one sleeps
|
|
||||||
|
|
||||||
CleftNP : NP -> RS -> Cl ; -- it is I who did it
|
|
||||||
CleftAdv : Adv -> S -> Cl ; -- it is here she slept
|
|
||||||
|
|
||||||
ExistNP : NP -> Cl ; -- there is a house
|
|
||||||
ExistIP : IP -> QCl ; -- which houses are there
|
|
||||||
|
|
||||||
ProgrVP : VP -> VP ; -- be sleeping
|
|
||||||
|
|
||||||
ImpPl1 : VP -> Utt ; -- let's go
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
--1 Lang: a Test Module for the Resource Grammar
|
|
||||||
|
|
||||||
-- This grammar is for testing the resource as included in the
|
|
||||||
-- language-independent API, consisting of a grammar and a lexicon.
|
|
||||||
-- The grammar without a lexicon is [``Grammar`` Grammar.html],
|
|
||||||
-- which may be more suitable to open in applications.
|
|
||||||
|
|
||||||
abstract Lang =
|
|
||||||
Grammar,
|
|
||||||
Lexicon
|
|
||||||
** {
|
|
||||||
flags startcat=Phr ;
|
|
||||||
} ;
|
|
||||||
@@ -1,356 +0,0 @@
|
|||||||
abstract Lexicon = Cat ** {
|
|
||||||
fun
|
|
||||||
add_V3 : V3 ;
|
|
||||||
airplane_N : N ;
|
|
||||||
already_Adv : Adv ;
|
|
||||||
animal_N : N ;
|
|
||||||
answer_V2S : V2S ;
|
|
||||||
apartment_N : N ;
|
|
||||||
apple_N : N ;
|
|
||||||
art_N : N ;
|
|
||||||
ashes_N : N ;
|
|
||||||
ask_V2Q : V2Q ;
|
|
||||||
baby_N : N ;
|
|
||||||
back_N : N ;
|
|
||||||
bad_A : A ;
|
|
||||||
bank_N : N ;
|
|
||||||
bark_N : N ;
|
|
||||||
beautiful_A : A ;
|
|
||||||
become_VA : VA ;
|
|
||||||
beer_N : N ;
|
|
||||||
beg_V2V : V2V ;
|
|
||||||
belly_N : N ;
|
|
||||||
big_A : A ;
|
|
||||||
bike_N : N ;
|
|
||||||
bird_N : N ;
|
|
||||||
bite_V2 : V2 ;
|
|
||||||
black_A : A ;
|
|
||||||
blood_N : N ;
|
|
||||||
blow_V : V ;
|
|
||||||
blue_A : A ;
|
|
||||||
boat_N : N ;
|
|
||||||
bone_N : N ;
|
|
||||||
book_N : N ;
|
|
||||||
boot_N : N ;
|
|
||||||
boss_N : N ;
|
|
||||||
boy_N : N ;
|
|
||||||
bread_N : N ;
|
|
||||||
break_V2 : V2 ;
|
|
||||||
breast_N : N ;
|
|
||||||
breathe_V : V ;
|
|
||||||
broad_A : A ;
|
|
||||||
brother_N2 : N2 ;
|
|
||||||
brown_A : A ;
|
|
||||||
burn_V : V ;
|
|
||||||
butter_N : N ;
|
|
||||||
buy_V2 : V2 ;
|
|
||||||
camera_N : N ;
|
|
||||||
cap_N : N ;
|
|
||||||
car_N : N ;
|
|
||||||
carpet_N : N ;
|
|
||||||
cat_N : N ;
|
|
||||||
ceiling_N : N ;
|
|
||||||
chair_N : N ;
|
|
||||||
cheese_N : N ;
|
|
||||||
child_N : N ;
|
|
||||||
church_N : N ;
|
|
||||||
city_N : N ;
|
|
||||||
clean_A : A ;
|
|
||||||
clever_A : A ;
|
|
||||||
close_V2 : V2 ;
|
|
||||||
cloud_N : N ;
|
|
||||||
coat_N : N ;
|
|
||||||
cold_A : A ;
|
|
||||||
come_V : V ;
|
|
||||||
computer_N : N ;
|
|
||||||
correct_A : A ;
|
|
||||||
country_N : N ;
|
|
||||||
count_V2 : V2 ;
|
|
||||||
cousin_N : N ;
|
|
||||||
cow_N : N ;
|
|
||||||
cut_V2 : V2 ;
|
|
||||||
day_N : N ;
|
|
||||||
die_V : V ;
|
|
||||||
dig_V : V ;
|
|
||||||
dirty_A : A ;
|
|
||||||
distance_N3 : N3 ;
|
|
||||||
doctor_N : N ;
|
|
||||||
dog_N : N ;
|
|
||||||
door_N : N ;
|
|
||||||
do_V2 : V2 ;
|
|
||||||
drink_V2 : V2 ;
|
|
||||||
drink_V2 : V2 ;
|
|
||||||
dry_A : A ;
|
|
||||||
dull_A : A ;
|
|
||||||
dust_N : N ;
|
|
||||||
ear_N : N ;
|
|
||||||
earth_N : N ;
|
|
||||||
easy_A2V : A2 ;
|
|
||||||
eat_V2 : V2 ;
|
|
||||||
eat_V2 : V2 ;
|
|
||||||
egg_N : N ;
|
|
||||||
empty_A : A ;
|
|
||||||
enemy_N : N ;
|
|
||||||
eye_N : N ;
|
|
||||||
factory_N : N ;
|
|
||||||
fall_V : V ;
|
|
||||||
far_Adv : Adv ;
|
|
||||||
father_N2 : N2 ;
|
|
||||||
fat_N : N ;
|
|
||||||
fear_VS : VS ;
|
|
||||||
fear_V2 : V2 ;
|
|
||||||
feather_N : N ;
|
|
||||||
fight_V2 : V2 ;
|
|
||||||
find_V2 : V2 ;
|
|
||||||
fingernail_N : N ;
|
|
||||||
fire_N : N ;
|
|
||||||
fish_N : N ;
|
|
||||||
float_V : V ;
|
|
||||||
floor_N : N ;
|
|
||||||
flower_N : N ;
|
|
||||||
flow_V : V ;
|
|
||||||
fly_V : V ;
|
|
||||||
fog_N : N ;
|
|
||||||
foot_N : N ;
|
|
||||||
forest_N : N ;
|
|
||||||
forget_V2 : V2 ;
|
|
||||||
freeze_V : V ;
|
|
||||||
fridge_N : N ;
|
|
||||||
friend_N : N ;
|
|
||||||
fruit_N : N ;
|
|
||||||
full_A : A ;
|
|
||||||
fun
|
|
||||||
fun_AV : A ;
|
|
||||||
garden_N : N ;
|
|
||||||
girl_N : N ;
|
|
||||||
give_V3 : V3 ;
|
|
||||||
glove_N : N ;
|
|
||||||
gold_N : N ;
|
|
||||||
good_A : A ;
|
|
||||||
go_V : V ;
|
|
||||||
grammar_N : N ;
|
|
||||||
grass_N : N ;
|
|
||||||
green_A : A ;
|
|
||||||
guts_N : N ;
|
|
||||||
hair_N : N ;
|
|
||||||
hand_N : N ;
|
|
||||||
harbour_N : N ;
|
|
||||||
hate_V2 : V2 ;
|
|
||||||
hat_N : N ;
|
|
||||||
have_V2 : V2 ;
|
|
||||||
head_N : N ;
|
|
||||||
heart_N : N ;
|
|
||||||
hear_V2 : V2 ;
|
|
||||||
hear_V2 : V2 ;
|
|
||||||
heavy_A : A ;
|
|
||||||
hill_N : N ;
|
|
||||||
hit_V2 : V2 ;
|
|
||||||
hold_V2 : V2 ;
|
|
||||||
hope_VS : VS ;
|
|
||||||
horn_N : N ;
|
|
||||||
horse_N : N ;
|
|
||||||
hot_A : A ;
|
|
||||||
house_N : N ;
|
|
||||||
hunt_V2 : V2 ;
|
|
||||||
husband_N : N ;
|
|
||||||
ice_N : N ;
|
|
||||||
important_A : A ;
|
|
||||||
industry_N : N ;
|
|
||||||
iron_N : N ;
|
|
||||||
john_PN : PN ;
|
|
||||||
jump_V : V ;
|
|
||||||
kill_V2 : V2 ;
|
|
||||||
king_N : N ;
|
|
||||||
knee_N : N ;
|
|
||||||
know_V2 : V2 ;
|
|
||||||
know_V2 : V2 ;
|
|
||||||
lake_N : N ;
|
|
||||||
lamp_N : N ;
|
|
||||||
language_N : N ;
|
|
||||||
laugh_V : V ;
|
|
||||||
leaf_N : N ;
|
|
||||||
learn_V2 : V2 ;
|
|
||||||
leather_N : N ;
|
|
||||||
leave_V2 : V2 ;
|
|
||||||
left_Ord : Ord ;
|
|
||||||
leg_N : N ;
|
|
||||||
lie_V : V ;
|
|
||||||
like_V2 : V2 ;
|
|
||||||
listen_V2 : V2 ;
|
|
||||||
liver_N : N ;
|
|
||||||
live_V : V ;
|
|
||||||
long_A : A ;
|
|
||||||
lose_V2 : V2 ;
|
|
||||||
louse_N : N ;
|
|
||||||
love_N : N ;
|
|
||||||
love_V2 : V2 ;
|
|
||||||
man_N : N ;
|
|
||||||
married_A2 : A2 ;
|
|
||||||
meat_N : N ;
|
|
||||||
milk_N : N ;
|
|
||||||
moon_N : N ;
|
|
||||||
mother_N2 : N2 ;
|
|
||||||
mountain_N : N ;
|
|
||||||
mouth_N : N ;
|
|
||||||
music_N : N ;
|
|
||||||
name_N : N ;
|
|
||||||
narrow_A : A ;
|
|
||||||
near_A : A ;
|
|
||||||
neck_N : N ;
|
|
||||||
new_A : A ;
|
|
||||||
newspaper_N : N ;
|
|
||||||
night_N : N ;
|
|
||||||
nose_N : N ;
|
|
||||||
now_Adv : Adv ;
|
|
||||||
number_N : N ;
|
|
||||||
oil_N : N ;
|
|
||||||
old_A : A ;
|
|
||||||
open_V2 : V2 ;
|
|
||||||
paint_V2A : V2A ;
|
|
||||||
paper_N : N ;
|
|
||||||
paris_PN : PN ;
|
|
||||||
peace_N : N ;
|
|
||||||
pen_N : N ;
|
|
||||||
person_N : N ;
|
|
||||||
planet_N : N ;
|
|
||||||
plastic_N : N ;
|
|
||||||
play_V2 : V2 ;
|
|
||||||
play_V : V ;
|
|
||||||
policeman_N : N ;
|
|
||||||
priest_N : N ;
|
|
||||||
probable_AS : A ;
|
|
||||||
pull_V2 : V2 ;
|
|
||||||
push_V2 : V2 ;
|
|
||||||
put_V2 : V2 ;
|
|
||||||
queen_N : N ;
|
|
||||||
question_N : N ;
|
|
||||||
radio_N : N ;
|
|
||||||
rain_N : N ;
|
|
||||||
rain_V0 : V ;
|
|
||||||
read_V2 : V2 ;
|
|
||||||
ready_A : A ;
|
|
||||||
reason_N : N ;
|
|
||||||
red_A : A ;
|
|
||||||
religion_N : N ;
|
|
||||||
restaurant_N : N ;
|
|
||||||
right_Ord : Ord ;
|
|
||||||
river_N : N ;
|
|
||||||
road_N : N ;
|
|
||||||
rock_N : N ;
|
|
||||||
roof_N : N ;
|
|
||||||
root_N : N ;
|
|
||||||
rope_N : N ;
|
|
||||||
rotten_A : A ;
|
|
||||||
round_A : A ;
|
|
||||||
rubber_N : N ;
|
|
||||||
rub_V2 : V2 ;
|
|
||||||
rule_N : N ;
|
|
||||||
run_V : V ;
|
|
||||||
salt_N : N ;
|
|
||||||
sand_N : N ;
|
|
||||||
say_VS : VS ;
|
|
||||||
school_N : N ;
|
|
||||||
science_N : N ;
|
|
||||||
scratch_V2 : V2 ;
|
|
||||||
sea_N : N ;
|
|
||||||
seed_N : N ;
|
|
||||||
seek_V2 : V2 ;
|
|
||||||
see_V2 : V2 ;
|
|
||||||
see_V2 : V2 ;
|
|
||||||
sell_V3 : V3 ;
|
|
||||||
send_V3 : V3 ;
|
|
||||||
sew_V : V ;
|
|
||||||
sharp_A : A ;
|
|
||||||
sheep_N : N ;
|
|
||||||
ship_N : N ;
|
|
||||||
shirt_N : N ;
|
|
||||||
shoe_N : N ;
|
|
||||||
shop_N : N ;
|
|
||||||
short_A : A ;
|
|
||||||
silver_N : N ;
|
|
||||||
sing_V : V ;
|
|
||||||
sister_N : N ;
|
|
||||||
sit_V : V ;
|
|
||||||
skin_N : N ;
|
|
||||||
sky_N : N ;
|
|
||||||
sleep_V : V ;
|
|
||||||
small_A : A ;
|
|
||||||
smell_V : V ;
|
|
||||||
smoke_N : N ;
|
|
||||||
smooth_A : A ;
|
|
||||||
snake_N : N ;
|
|
||||||
snow_N : N ;
|
|
||||||
sock_N : N ;
|
|
||||||
song_N : N ;
|
|
||||||
speak_V2 : V2 ;
|
|
||||||
spit_V : V ;
|
|
||||||
split_V2 : V2 ;
|
|
||||||
squeeze_V2 : V2 ;
|
|
||||||
stab_V2 : V2 ;
|
|
||||||
stand_V : V ;
|
|
||||||
star_N : N ;
|
|
||||||
steel_N : N ;
|
|
||||||
stick_N : N ;
|
|
||||||
stone_N : N ;
|
|
||||||
stop_V : V ;
|
|
||||||
stove_N : N ;
|
|
||||||
straight_A : A ;
|
|
||||||
student_N : N ;
|
|
||||||
stupid_A : A ;
|
|
||||||
suck_V2 : V2 ;
|
|
||||||
sun_N : N ;
|
|
||||||
swell_V : V ;
|
|
||||||
swim_V : V ;
|
|
||||||
switch8off_V2 : V2 ;
|
|
||||||
switch8on_V2 : V2 ;
|
|
||||||
table_N : N ;
|
|
||||||
tail_N : N ;
|
|
||||||
talk_V3 : V3 ;
|
|
||||||
teacher_N : N ;
|
|
||||||
teach_V2 : V2 ;
|
|
||||||
television_N : N ;
|
|
||||||
thick_A : A ;
|
|
||||||
thin_A : A ;
|
|
||||||
think_V : V ;
|
|
||||||
throw_V2 : V2 ;
|
|
||||||
tie_V2 : V2 ;
|
|
||||||
today_Adv : Adv ;
|
|
||||||
tongue_N : N ;
|
|
||||||
tooth_N : N ;
|
|
||||||
train_N : N ;
|
|
||||||
travel_V : V ;
|
|
||||||
tree_N : N ;
|
|
||||||
turn_V : V ;
|
|
||||||
ugly_A : A ;
|
|
||||||
uncertain_A : A ;
|
|
||||||
understand_V2 : V2 ;
|
|
||||||
university_N : N ;
|
|
||||||
village_N : N ;
|
|
||||||
vomit_V : V ;
|
|
||||||
wait_V2 : V2 ;
|
|
||||||
walk_V : V ;
|
|
||||||
warm_A : A ;
|
|
||||||
war_N : N ;
|
|
||||||
wash_V2 : V2 ;
|
|
||||||
watch_V2 : V2 ;
|
|
||||||
water_N : N ;
|
|
||||||
wet_A : A ;
|
|
||||||
white_A : A ;
|
|
||||||
wide_A : A ;
|
|
||||||
wife_N : N ;
|
|
||||||
wind_N : N ;
|
|
||||||
window_N : N ;
|
|
||||||
wine_N : N ;
|
|
||||||
wing_N : N ;
|
|
||||||
win_V2 : V2 ;
|
|
||||||
wipe_V2 : V2 ;
|
|
||||||
woman_N : N ;
|
|
||||||
wonder_VQ : VQ ;
|
|
||||||
wood_N : N ;
|
|
||||||
worm_N : N ;
|
|
||||||
write_V2 : V2 ;
|
|
||||||
year_N : N ;
|
|
||||||
yellow_A : A ;
|
|
||||||
young_A : A ;
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,144 +0,0 @@
|
|||||||
--1 Noun: Nouns, noun phrases, and determiners
|
|
||||||
|
|
||||||
abstract Noun = Cat ** {
|
|
||||||
|
|
||||||
|
|
||||||
--2 Noun phrases
|
|
||||||
|
|
||||||
-- The three main types of noun phrases are
|
|
||||||
-- - common nouns with determiners
|
|
||||||
-- - proper names
|
|
||||||
-- - pronouns
|
|
||||||
--
|
|
||||||
--
|
|
||||||
fun
|
|
||||||
DetCN : Det -> CN -> NP ; -- the man
|
|
||||||
UsePN : PN -> NP ; -- John
|
|
||||||
UsePron : Pron -> NP ; -- he
|
|
||||||
|
|
||||||
-- Pronouns are defined in the module [``Structural`` Structural.html].
|
|
||||||
|
|
||||||
-- A noun phrase already formed can be modified by a $Predet$erminer.
|
|
||||||
|
|
||||||
PredetNP : Predet -> NP -> NP; -- only the man
|
|
||||||
|
|
||||||
-- A noun phrase can also be postmodified by the past participle of a
|
|
||||||
-- verb, by an adverb, or by a relative clause
|
|
||||||
|
|
||||||
PPartNP : NP -> V2 -> NP ; -- the man seen
|
|
||||||
AdvNP : NP -> Adv -> NP ; -- Paris today
|
|
||||||
RelNP : NP -> RS -> NP ; -- Paris, which is here
|
|
||||||
|
|
||||||
-- Determiners can form noun phrases directly.
|
|
||||||
|
|
||||||
DetNP : Det -> NP ; -- these five
|
|
||||||
|
|
||||||
|
|
||||||
--2 Determiners
|
|
||||||
|
|
||||||
-- The determiner has a fine-grained structure, in which a 'nucleus'
|
|
||||||
-- quantifier and two optional parts can be discerned: a cardinal and
|
|
||||||
-- an ordinal numeral.
|
|
||||||
|
|
||||||
DetQuantOrd : Quant -> Num -> Ord -> Det ; -- these five best
|
|
||||||
DetQuant : Quant -> Num -> Det ; -- these five
|
|
||||||
|
|
||||||
-- Whether the resulting determiner is singular or plural depends on the
|
|
||||||
-- cardinal.
|
|
||||||
|
|
||||||
-- All parts of the determiner can be empty, except $Quant$, which is
|
|
||||||
-- the "kernel" of a determiner. It is, however, the $Num$ that determines
|
|
||||||
-- the inherent number.
|
|
||||||
|
|
||||||
NumSg : Num ;
|
|
||||||
NumPl : Num ;
|
|
||||||
NumCard : Card -> Num ;
|
|
||||||
|
|
||||||
-- $Card$ consists of either digits or numeral words.
|
|
||||||
|
|
||||||
NumDigits : Digits -> Card ; -- 51
|
|
||||||
NumNumeral : Numeral -> Card ; -- fifty-one
|
|
||||||
|
|
||||||
-- The construction of numerals is defined in [Numeral Numeral.html].
|
|
||||||
|
|
||||||
-- A $Card$ can be modified by certain adverbs.
|
|
||||||
|
|
||||||
AdNum : AdN -> Card -> Card ; -- almost 51
|
|
||||||
|
|
||||||
-- An $Ord$ consists of either digits or numeral words.
|
|
||||||
-- Also superlative forms of adjectives behave syntactically like ordinals.
|
|
||||||
|
|
||||||
OrdDigits : Digits -> Ord ; -- 51st
|
|
||||||
OrdNumeral : Numeral -> Ord ; -- fifty-first
|
|
||||||
OrdSuperl : A -> Ord ; -- warmest
|
|
||||||
|
|
||||||
-- Definite and indefinite noun phrases are sometimes realized as
|
|
||||||
-- neatly distinct words (Spanish "un, unos ; el, los") but also without
|
|
||||||
-- any particular word (Finnish; Swedish definites).
|
|
||||||
|
|
||||||
DetArtOrd : Art -> Num -> Ord -> Det ; -- the (five) best
|
|
||||||
DetArtCard : Art -> Card -> Det ; -- the five
|
|
||||||
|
|
||||||
IndefArt : Art ;
|
|
||||||
DefArt : Art ;
|
|
||||||
|
|
||||||
-- Articles cannot alone form noun phrases, but need a noun.
|
|
||||||
|
|
||||||
DetArtSg : Art -> CN -> NP ; -- the man
|
|
||||||
DetArtPl : Art -> CN -> NP ; -- the men
|
|
||||||
|
|
||||||
-- Nouns can be used without an article as mass nouns. The resource does
|
|
||||||
-- not distinguish mass nouns from other common nouns, which can result
|
|
||||||
-- in semantically odd expressions.
|
|
||||||
|
|
||||||
MassNP : CN -> NP ; -- (beer)
|
|
||||||
|
|
||||||
-- Pronouns have possessive forms. Genitives of other kinds
|
|
||||||
-- of noun phrases are not given here, since they are not possible
|
|
||||||
-- in e.g. Romance languages. They can be found in $Extra$ modules.
|
|
||||||
|
|
||||||
PossPron : Pron -> Quant ; -- my (house)
|
|
||||||
|
|
||||||
-- Other determiners are defined in [Structural Structural.html].
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--2 Common nouns
|
|
||||||
|
|
||||||
-- Simple nouns can be used as nouns outright.
|
|
||||||
|
|
||||||
UseN : N -> CN ; -- house
|
|
||||||
|
|
||||||
-- Relational nouns take one or two arguments.
|
|
||||||
|
|
||||||
ComplN2 : N2 -> NP -> CN ; -- mother of the king
|
|
||||||
ComplN3 : N3 -> NP -> N2 ; -- distance from this city (to Paris)
|
|
||||||
|
|
||||||
-- Relational nouns can also be used without their arguments.
|
|
||||||
-- The semantics is typically derivative of the relational meaning.
|
|
||||||
|
|
||||||
UseN2 : N2 -> CN ; -- mother
|
|
||||||
Use2N3 : N3 -> N2 ; -- distance (from this city)
|
|
||||||
Use3N3 : N3 -> N2 ; -- distance (to Paris)
|
|
||||||
|
|
||||||
-- Nouns can be modified by adjectives, relative clauses, and adverbs
|
|
||||||
-- (the last rule will give rise to many 'PP attachment' ambiguities
|
|
||||||
-- when used in connection with verb phrases).
|
|
||||||
|
|
||||||
AdjCN : AP -> CN -> CN ; -- big house
|
|
||||||
RelCN : CN -> RS -> CN ; -- house that John bought
|
|
||||||
AdvCN : CN -> Adv -> CN ; -- house on the hill
|
|
||||||
|
|
||||||
-- Nouns can also be modified by embedded sentences and questions.
|
|
||||||
-- For some nouns this makes little sense, but we leave this for applications
|
|
||||||
-- to decide. Sentential complements are defined in [Verb Verb.html].
|
|
||||||
|
|
||||||
SentCN : CN -> SC -> CN ; -- question where she sleeps
|
|
||||||
|
|
||||||
--2 Apposition
|
|
||||||
|
|
||||||
-- This is certainly overgenerating.
|
|
||||||
|
|
||||||
ApposCN : CN -> NP -> CN ; -- city Paris (, numbers x and y)
|
|
||||||
|
|
||||||
} ;
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
--1 Numerals
|
|
||||||
|
|
||||||
-- This grammar defines numerals from 1 to 999999.
|
|
||||||
-- The implementations are adapted from the
|
|
||||||
-- [numerals library http://www.cs.chalmers.se/~aarne/GF/examples/numerals/]
|
|
||||||
-- which defines numerals for 88 languages.
|
|
||||||
-- The resource grammar implementations add to this inflection (if needed)
|
|
||||||
-- and ordinal numbers.
|
|
||||||
--
|
|
||||||
-- *Note* 1. Number 1 as defined
|
|
||||||
-- in the category $Numeral$ here should not be used in the formation of
|
|
||||||
-- noun phrases, and should therefore be removed. Instead, one should use
|
|
||||||
-- [Structural Structural.html]$.one_Quant$. This makes the grammar simpler
|
|
||||||
-- because we can assume that numbers form plural noun phrases.
|
|
||||||
--
|
|
||||||
-- *Note* 2. The implementations introduce spaces between
|
|
||||||
-- parts of a numeral, which is often incorrect - more work on
|
|
||||||
-- (un)lexing is needed to solve this problem.
|
|
||||||
|
|
||||||
abstract Numeral = Cat ** {
|
|
||||||
|
|
||||||
cat
|
|
||||||
Digit ; -- 2..9
|
|
||||||
Sub10 ; -- 1..9
|
|
||||||
Sub100 ; -- 1..99
|
|
||||||
Sub1000 ; -- 1..999
|
|
||||||
Sub1000000 ; -- 1..999999
|
|
||||||
|
|
||||||
fun
|
|
||||||
num : Sub1000000 -> Numeral ;
|
|
||||||
|
|
||||||
n2, n3, n4, n5, n6, n7, n8, n9 : Digit ;
|
|
||||||
|
|
||||||
pot01 : Sub10 ; -- 1
|
|
||||||
pot0 : Digit -> Sub10 ; -- d * 1
|
|
||||||
pot110 : Sub100 ; -- 10
|
|
||||||
pot111 : Sub100 ; -- 11
|
|
||||||
pot1to19 : Digit -> Sub100 ; -- 10 + d
|
|
||||||
pot0as1 : Sub10 -> Sub100 ; -- coercion of 1..9
|
|
||||||
pot1 : Digit -> Sub100 ; -- d * 10
|
|
||||||
pot1plus : Digit -> Sub10 -> Sub100 ; -- d * 10 + n
|
|
||||||
pot1as2 : Sub100 -> Sub1000 ; -- coercion of 1..99
|
|
||||||
pot2 : Sub10 -> Sub1000 ; -- m * 100
|
|
||||||
pot2plus : Sub10 -> Sub100 -> Sub1000 ; -- m * 100 + n
|
|
||||||
pot2as3 : Sub1000 -> Sub1000000 ; -- coercion of 1..999
|
|
||||||
pot3 : Sub1000 -> Sub1000000 ; -- m * 1000
|
|
||||||
pot3plus : Sub1000 -> Sub1000 -> Sub1000000 ; -- m * 1000 + n
|
|
||||||
|
|
||||||
-- Numerals as sequences of digits have a separate, simpler grammar
|
|
||||||
|
|
||||||
cat
|
|
||||||
Dig ; -- single digit 0..9
|
|
||||||
|
|
||||||
fun
|
|
||||||
IDig : Dig -> Digits ; -- 8
|
|
||||||
IIDig : Dig -> Digits -> Digits ; -- 876
|
|
||||||
|
|
||||||
D_0, D_1, D_2, D_3, D_4, D_5, D_6, D_7, D_8, D_9 : Dig ;
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
--1 Phrase: Phrases and Utterances
|
|
||||||
|
|
||||||
abstract Phrase = Cat ** {
|
|
||||||
|
|
||||||
-- When a phrase is built from an utterance it can be prefixed
|
|
||||||
-- with a phrasal conjunction (such as "but", "therefore")
|
|
||||||
-- and suffixing with a vocative (typically a noun phrase).
|
|
||||||
|
|
||||||
fun
|
|
||||||
PhrUtt : PConj -> Utt -> Voc -> Phr ; -- but come here, my friend
|
|
||||||
|
|
||||||
-- Utterances are formed from sentences, questions, and imperatives.
|
|
||||||
|
|
||||||
UttS : S -> Utt ; -- John walks
|
|
||||||
UttQS : QS -> Utt ; -- is it good
|
|
||||||
UttImpSg : Pol -> Imp -> Utt; -- (don't) love yourself
|
|
||||||
UttImpPl : Pol -> Imp -> Utt; -- (don't) love yourselves
|
|
||||||
UttImpPol : Pol -> Imp -> Utt ; -- (don't) sleep (polite)
|
|
||||||
|
|
||||||
-- There are also 'one-word utterances'. A typical use of them is
|
|
||||||
-- as answers to questions.
|
|
||||||
-- *Note*. This list is incomplete. More categories could be covered.
|
|
||||||
-- Moreover, in many languages e.g. noun phrases in different cases
|
|
||||||
-- can be used.
|
|
||||||
|
|
||||||
UttIP : IP -> Utt ; -- who
|
|
||||||
UttIAdv : IAdv -> Utt ; -- why
|
|
||||||
UttNP : NP -> Utt ; -- this man
|
|
||||||
UttAdv : Adv -> Utt ; -- here
|
|
||||||
UttVP : VP -> Utt ; -- to sleep
|
|
||||||
|
|
||||||
-- The phrasal conjunction is optional. A sentence conjunction
|
|
||||||
-- can also used to prefix an utterance.
|
|
||||||
|
|
||||||
NoPConj : PConj ;
|
|
||||||
PConjConj : Conj -> PConj ; -- and
|
|
||||||
|
|
||||||
-- The vocative is optional. Any noun phrase can be made into vocative,
|
|
||||||
-- which may be overgenerating (e.g. "I").
|
|
||||||
|
|
||||||
NoVoc : Voc ;
|
|
||||||
VocNP : NP -> Voc ; -- my friend
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
--1 Question: Questions and Interrogative Pronouns
|
|
||||||
|
|
||||||
abstract Question = Cat ** {
|
|
||||||
|
|
||||||
-- A question can be formed from a clause ('yes-no question') or
|
|
||||||
-- with an interrogative.
|
|
||||||
|
|
||||||
fun
|
|
||||||
QuestCl : Cl -> QCl ; -- does John walk
|
|
||||||
QuestVP : IP -> VP -> QCl ; -- who walks
|
|
||||||
QuestSlash : IP -> ClSlash -> QCl ; -- whom does John love
|
|
||||||
QuestIAdv : IAdv -> Cl -> QCl ; -- why does John walk
|
|
||||||
QuestIComp : IComp -> NP -> QCl ; -- where is John
|
|
||||||
|
|
||||||
-- Interrogative pronouns can be formed with interrogative
|
|
||||||
-- determiners, with or without a noun.
|
|
||||||
|
|
||||||
IdetCN : IDet -> CN -> IP ; -- which five songs
|
|
||||||
IdetIP : IDet -> IP ; -- which five
|
|
||||||
|
|
||||||
-- They can be modified with adverbs.
|
|
||||||
|
|
||||||
AdvIP : IP -> Adv -> IP ; -- who in Paris
|
|
||||||
|
|
||||||
-- Interrogative quantifiers have number forms and can take number modifiers.
|
|
||||||
|
|
||||||
IdetQuant : IQuant -> Num -> IDet ; -- which (five)
|
|
||||||
|
|
||||||
-- Interrogative adverbs can be formed prepositionally.
|
|
||||||
|
|
||||||
PrepIP : Prep -> IP -> IAdv ; -- with whom
|
|
||||||
|
|
||||||
-- Interrogative complements to copulas can be both adverbs and
|
|
||||||
-- pronouns.
|
|
||||||
|
|
||||||
CompIAdv : IAdv -> IComp ; -- where (is it)
|
|
||||||
CompIP : IP -> IComp ; -- who (is it)
|
|
||||||
|
|
||||||
-- More $IP$, $IDet$, and $IAdv$ are defined in $Structural$.
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
--1 Relative clauses and pronouns
|
|
||||||
|
|
||||||
abstract Relative = Cat ** {
|
|
||||||
|
|
||||||
fun
|
|
||||||
|
|
||||||
-- The simplest way to form a relative clause is from a clause by
|
|
||||||
-- a pronoun similar to "such that".
|
|
||||||
|
|
||||||
RelCl : Cl -> RCl ; -- such that John loves her
|
|
||||||
|
|
||||||
-- The more proper ways are from a verb phrase
|
|
||||||
-- (formed in [``Verb`` Verb.html]) or a sentence
|
|
||||||
-- with a missing noun phrase (formed in [``Sentence`` Sentence.html]).
|
|
||||||
|
|
||||||
RelVP : RP -> VP -> RCl ; -- who loves John
|
|
||||||
RelSlash : RP -> ClSlash -> RCl ; -- whom John loves
|
|
||||||
|
|
||||||
-- Relative pronouns are formed from an 'identity element' by prefixing
|
|
||||||
-- or suffixing (depending on language) prepositional phrases.
|
|
||||||
|
|
||||||
IdRP : RP ; -- which
|
|
||||||
FunRP : Prep -> NP -> RP -> RP ; -- the mother of whom
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,95 +0,0 @@
|
|||||||
--1 Sentence: Sentences, Clauses, and Imperatives
|
|
||||||
|
|
||||||
abstract Sentence = Cat ** {
|
|
||||||
|
|
||||||
--2 Clauses
|
|
||||||
|
|
||||||
-- The $NP VP$ predication rule form a clause whose linearization
|
|
||||||
-- gives a table of all tense variants, positive and negative.
|
|
||||||
-- Clauses are converted to $S$ (with fixed tense) with the
|
|
||||||
-- $UseCl$ function below.
|
|
||||||
|
|
||||||
fun
|
|
||||||
PredVP : NP -> VP -> Cl ; -- John walks
|
|
||||||
|
|
||||||
-- Using an embedded sentence as a subject is treated separately.
|
|
||||||
-- This can be overgenerating. E.g. "whether you go" as subject
|
|
||||||
-- is only meaningful for some verb phrases.
|
|
||||||
|
|
||||||
PredSCVP : SC -> VP -> Cl ; -- that she goes is good
|
|
||||||
|
|
||||||
--2 Clauses missing object noun phrases
|
|
||||||
|
|
||||||
-- This category is a variant of the 'slash category' $S/NP$ of
|
|
||||||
-- GPSG and categorial grammars, which in turn replaces
|
|
||||||
-- movement transformations in the formation of questions
|
|
||||||
-- and relative clauses. Except $SlashV2$, the construction
|
|
||||||
-- rules can be seen as special cases of function composition, in
|
|
||||||
-- the style of CCG.
|
|
||||||
-- *Note* the set is not complete and lacks e.g. verbs with more than 2 places.
|
|
||||||
|
|
||||||
SlashVP : NP -> VPSlash -> ClSlash ; -- (whom) he sees
|
|
||||||
AdvSlash : ClSlash -> Adv -> ClSlash ; -- (whom) he sees today
|
|
||||||
SlashPrep : Cl -> Prep -> ClSlash ; -- (with whom) he walks
|
|
||||||
SlashVS : NP -> VS -> SSlash -> ClSlash ; -- (whom) she says that he loves
|
|
||||||
|
|
||||||
--2 Imperatives
|
|
||||||
|
|
||||||
-- An imperative is straightforwardly formed from a verb phrase.
|
|
||||||
-- It has variation over positive and negative, singular and plural.
|
|
||||||
-- To fix these parameters, see [Phrase Phrase.html].
|
|
||||||
|
|
||||||
ImpVP : VP -> Imp ; -- love yourselves
|
|
||||||
|
|
||||||
--2 Embedded sentences
|
|
||||||
|
|
||||||
-- Sentences, questions, and infinitival phrases can be used as
|
|
||||||
-- subjects and (adverbial) complements.
|
|
||||||
|
|
||||||
EmbedS : S -> SC ; -- that she goes
|
|
||||||
EmbedQS : QS -> SC ; -- who goes
|
|
||||||
EmbedVP : VP -> SC ; -- to go
|
|
||||||
|
|
||||||
--2 Sentences
|
|
||||||
|
|
||||||
-- These are the 2 x 4 x 4 = 16 forms generated by different
|
|
||||||
-- combinations of tense, polarity, and
|
|
||||||
-- anteriority, which are defined in [``Common`` Common.html].
|
|
||||||
|
|
||||||
fun
|
|
||||||
UseCl : Tense -> Ant -> Pol -> Cl -> S ;
|
|
||||||
UseQCl : Tense -> Ant -> Pol -> QCl -> QS ;
|
|
||||||
UseRCl : Tense -> Ant -> Pol -> RCl -> RS ;
|
|
||||||
UseSlash : Tense -> Ant -> Pol -> ClSlash -> SSlash ;
|
|
||||||
|
|
||||||
-- An adverb can be added to the beginning of a sentence.
|
|
||||||
|
|
||||||
AdvS : Adv -> S -> S ; -- today, I will go home
|
|
||||||
|
|
||||||
-- A sentence can be modified by a relative clause referring to its contents.
|
|
||||||
|
|
||||||
RelS : S -> RS -> S ; -- she sleeps, which is good
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
--.
|
|
||||||
|
|
||||||
-- Examples for English $S$/$Cl$:
|
|
||||||
{-
|
|
||||||
Pres Simul Pos ODir : he sleeps
|
|
||||||
Pres Simul Neg ODir : he doesn't sleep
|
|
||||||
Pres Anter Pos ODir : he has slept
|
|
||||||
Pres Anter Neg ODir : he hasn't slept
|
|
||||||
Past Simul Pos ODir : he slept
|
|
||||||
Past Simul Neg ODir : he didn't sleep
|
|
||||||
Past Anter Pos ODir : he had slept
|
|
||||||
Past Anter Neg ODir : he hadn't slept
|
|
||||||
Fut Simul Pos ODir : he will sleep
|
|
||||||
Fut Simul Neg ODir : he won't sleep
|
|
||||||
Fut Anter Pos ODir : he will have slept
|
|
||||||
Fut Anter Neg ODir : he won't have slept
|
|
||||||
Cond Simul Pos ODir : he would sleep
|
|
||||||
Cond Simul Neg ODir : he wouldn't sleep
|
|
||||||
Cond Anter Pos ODir : he would have slept
|
|
||||||
Cond Anter Neg ODir : he wouldn't have slept
|
|
||||||
-}
|
|
||||||
@@ -1,111 +0,0 @@
|
|||||||
--1 Structural: Structural Words
|
|
||||||
--
|
|
||||||
-- Here we have some words belonging to closed classes and appearing
|
|
||||||
-- in all languages we have considered.
|
|
||||||
-- Sometimes more distinctions are needed, e.g. $we_Pron$ in Spanish
|
|
||||||
-- should be replaced by masculine and feminine variants, found in
|
|
||||||
-- [``ExtraSpa`` ../spanish/ExtraSpa.gf].
|
|
||||||
|
|
||||||
abstract Structural = Cat ** {
|
|
||||||
|
|
||||||
fun
|
|
||||||
|
|
||||||
-- This is an alphabetical list of structural words
|
|
||||||
|
|
||||||
above_Prep : Prep ;
|
|
||||||
after_Prep : Prep ;
|
|
||||||
all_Predet : Predet ;
|
|
||||||
almost_AdA : AdA ;
|
|
||||||
almost_AdN : AdN ;
|
|
||||||
although_Subj : Subj ;
|
|
||||||
always_AdV : AdV ;
|
|
||||||
and_Conj : Conj ;
|
|
||||||
because_Subj : Subj ;
|
|
||||||
before_Prep : Prep ;
|
|
||||||
behind_Prep : Prep ;
|
|
||||||
between_Prep : Prep ;
|
|
||||||
both7and_DConj : Conj ; -- both...and
|
|
||||||
---b both7and_DConj : DConj ;
|
|
||||||
but_PConj : PConj ;
|
|
||||||
by8agent_Prep : Prep ; -- by (agent)
|
|
||||||
by8means_Prep : Prep ; -- by (means of)
|
|
||||||
can8know_VV : VV ; -- can (capacity)
|
|
||||||
can_VV : VV ; -- can (possibility)
|
|
||||||
during_Prep : Prep ;
|
|
||||||
either7or_DConj : Conj ; -- either...or
|
|
||||||
---b either7or_DConj : DConj ;
|
|
||||||
every_Det : Det ;
|
|
||||||
everybody_NP : NP ; -- everybody
|
|
||||||
everything_NP : NP ;
|
|
||||||
everywhere_Adv : Adv ;
|
|
||||||
--- first_Ord : Ord ; DEPRECATED
|
|
||||||
few_Det : Det ;
|
|
||||||
for_Prep : Prep ;
|
|
||||||
from_Prep : Prep ;
|
|
||||||
he_Pron : Pron ;
|
|
||||||
here_Adv : Adv ;
|
|
||||||
here7to_Adv : Adv ; -- to here
|
|
||||||
here7from_Adv : Adv ; -- from here
|
|
||||||
how_IAdv : IAdv ;
|
|
||||||
how8many_IDet : IDet ;
|
|
||||||
i_Pron : Pron ;
|
|
||||||
if_Subj : Subj ;
|
|
||||||
in8front_Prep : Prep ; -- in front of
|
|
||||||
in_Prep : Prep ;
|
|
||||||
it_Pron : Pron ;
|
|
||||||
less_CAdv : CAdv ;
|
|
||||||
many_Det : Det ;
|
|
||||||
more_CAdv : CAdv ;
|
|
||||||
most_Predet : Predet ;
|
|
||||||
much_Det : Det ;
|
|
||||||
must_VV : VV ;
|
|
||||||
---b no_Phr : Phr ;
|
|
||||||
no_Utt : Utt ;
|
|
||||||
on_Prep : Prep ;
|
|
||||||
--- one_Quant : QuantSg ; DEPRECATED
|
|
||||||
only_Predet : Predet ;
|
|
||||||
or_Conj : Conj ;
|
|
||||||
otherwise_PConj : PConj ;
|
|
||||||
part_Prep : Prep ;
|
|
||||||
please_Voc : Voc ;
|
|
||||||
possess_Prep : Prep ; -- of (possessive)
|
|
||||||
quite_Adv : AdA ;
|
|
||||||
she_Pron : Pron ;
|
|
||||||
so_AdA : AdA ;
|
|
||||||
someSg_Det : Det ;
|
|
||||||
somePl_Det : Det ;
|
|
||||||
somebody_NP : NP ;
|
|
||||||
something_NP : NP ;
|
|
||||||
somewhere_Adv : Adv ;
|
|
||||||
that_Quant : Quant ;
|
|
||||||
there_Adv : Adv ;
|
|
||||||
there7to_Adv : Adv ; -- to there
|
|
||||||
there7from_Adv : Adv ; -- from there
|
|
||||||
therefore_PConj : PConj ;
|
|
||||||
they_Pron : Pron ;
|
|
||||||
this_Quant : Quant ;
|
|
||||||
through_Prep : Prep ;
|
|
||||||
to_Prep : Prep ;
|
|
||||||
too_AdA : AdA ;
|
|
||||||
under_Prep : Prep ;
|
|
||||||
very_AdA : AdA ;
|
|
||||||
want_VV : VV ;
|
|
||||||
we_Pron : Pron ;
|
|
||||||
whatPl_IP : IP ; -- what (plural)
|
|
||||||
whatSg_IP : IP ; -- what (singular)
|
|
||||||
when_IAdv : IAdv ;
|
|
||||||
when_Subj : Subj ;
|
|
||||||
where_IAdv : IAdv ;
|
|
||||||
which_IQuant : IQuant ;
|
|
||||||
whoPl_IP : IP ; -- who (plural)
|
|
||||||
whoSg_IP : IP ; -- who (singular)
|
|
||||||
why_IAdv : IAdv ;
|
|
||||||
with_Prep : Prep ;
|
|
||||||
without_Prep : Prep ;
|
|
||||||
---b yes_Phr : Phr ;
|
|
||||||
yes_Utt : Utt ;
|
|
||||||
youSg_Pron : Pron ; -- you (singular)
|
|
||||||
youPl_Pron : Pron ; -- you (plural)
|
|
||||||
youPol_Pron : Pron ; -- you (polite)
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
--1 Text: Texts
|
|
||||||
|
|
||||||
-- Texts are built from an empty text by adding $Phr$ases,
|
|
||||||
-- using as constructors the punctuation marks ".", "?", and "!".
|
|
||||||
-- Any punctuation mark can be attached to any kind of phrase.
|
|
||||||
|
|
||||||
abstract Text = Common ** {
|
|
||||||
|
|
||||||
fun
|
|
||||||
TEmpty : Text ; --
|
|
||||||
TFullStop : Phr -> Text -> Text ; -- John walks. ...
|
|
||||||
TQuestMark : Phr -> Text -> Text ; -- Are they here? ...
|
|
||||||
TExclMark : Phr -> Text -> Text ; -- Let's go! ...
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
--1 The construction of verb phrases
|
|
||||||
|
|
||||||
abstract Verb = Cat ** {
|
|
||||||
|
|
||||||
--2 Complementization rules
|
|
||||||
|
|
||||||
-- Verb phrases are constructed from verbs by providing their
|
|
||||||
-- complements. There is one rule for each verb category.
|
|
||||||
|
|
||||||
fun
|
|
||||||
UseV : V -> VP ; -- sleep
|
|
||||||
|
|
||||||
ComplVV : VV -> VP -> VP ; -- want to run
|
|
||||||
ComplVS : VS -> S -> VP ; -- say that she runs
|
|
||||||
ComplVQ : VQ -> QS -> VP ; -- wonder who runs
|
|
||||||
ComplVA : VA -> AP -> VP ; -- they become red
|
|
||||||
|
|
||||||
SlashV2a : V2 -> VPSlash ; -- love (it)
|
|
||||||
Slash2V3 : V3 -> NP -> VPSlash ; -- give it (to her)
|
|
||||||
Slash3V3 : V3 -> NP -> VPSlash ; -- give (it) to her
|
|
||||||
|
|
||||||
SlashV2V : V2V -> VP -> VPSlash ; -- beg (her) to go
|
|
||||||
SlashV2S : V2S -> S -> VPSlash ; -- answer (to him) that it is good
|
|
||||||
SlashV2Q : V2Q -> QS -> VPSlash ; -- ask (him) who came
|
|
||||||
SlashV2A : V2A -> AP -> VPSlash ; -- paint (it) red
|
|
||||||
|
|
||||||
ComplSlash : VPSlash -> NP -> VP ; -- love it
|
|
||||||
|
|
||||||
SlashVV : VV -> VPSlash -> VPSlash ; -- want to buy
|
|
||||||
SlashV2VNP : V2V -> NP -> VPSlash -> VPSlash ; -- beg me to buy
|
|
||||||
|
|
||||||
--2 Other ways of forming verb phrases
|
|
||||||
|
|
||||||
-- Verb phrases can also be constructed reflexively and from
|
|
||||||
-- copula-preceded complements.
|
|
||||||
|
|
||||||
ReflVP : VPSlash -> VP ; -- love himself
|
|
||||||
UseComp : Comp -> VP ; -- be warm
|
|
||||||
|
|
||||||
-- Passivization of two-place verbs is another way to use
|
|
||||||
-- them. In many languages, the result is a participle that
|
|
||||||
-- is used as complement to a copula ("is used"), but other
|
|
||||||
-- auxiliary verbs are possible (Ger. "wird angewendet", It.
|
|
||||||
-- "viene usato"), as well as special verb forms (Fin. "käytetään",
|
|
||||||
-- Swe. "används").
|
|
||||||
--
|
|
||||||
-- *Note*. the rule can be overgenerating, since the $V2$ need not
|
|
||||||
-- take a direct object.
|
|
||||||
|
|
||||||
PassV2 : V2 -> VP ; -- be loved
|
|
||||||
|
|
||||||
-- Adverbs can be added to verb phrases. Many languages make
|
|
||||||
-- a distinction between adverbs that are attached in the end
|
|
||||||
-- vs. next to (or before) the verb.
|
|
||||||
|
|
||||||
AdvVP : VP -> Adv -> VP ; -- sleep here
|
|
||||||
AdVVP : AdV -> VP -> VP ; -- always sleep
|
|
||||||
|
|
||||||
-- *Agents of passives* are constructed as adverbs with the
|
|
||||||
-- preposition [Structural Structural.html]$.8agent_Prep$.
|
|
||||||
|
|
||||||
|
|
||||||
--2 Complements to copula
|
|
||||||
|
|
||||||
-- Adjectival phrases, noun phrases, and adverbs can be used.
|
|
||||||
|
|
||||||
CompAP : AP -> Comp ; -- (be) small
|
|
||||||
CompNP : NP -> Comp ; -- (be) a man
|
|
||||||
CompAdv : Adv -> Comp ; -- (be) here
|
|
||||||
|
|
||||||
}
|
|
||||||