for release meeting

2026-05-26 11:18:55 -06:00 · 2004-06-21 08:53:58 +00:00
parent a134a1fd65
commit b248e6e25e
5 changed files with 570 additions and 9 deletions
--- a/doc/release2.html
+++ b/doc/release2.html
@@ -0,0 +1,546 @@
+<html>
+
+<body bgcolor="#FFFFFF" text="#000000">
+
+<center>
+
+<h1>Grammatical Framework Version 2</h1>
+
+Release of Version 2.0
+
+<p>
+
+Planned: 24 June 2004
+
+<p>
+
+<a href="http://www.cs.chalmers.se/~aarne">Aarne Ranta</a>
+
+</center>
+
+
+<!-- NEW -->
+
+<h2>Highlights</h2>
+
+Module system.
+
+<p>
+
+Separate compilation to canonical GF.
+
+<p>
+
+Improved GUI.
+
+<p>
+
+Improved parser generation.
+
+<p>
+
+Improved shell (new commands and options, help, error messages).
+
+<p>
+
+Accurate <a href="DocGF.pdf">language specification</a>
+(also of GFC).
+
+<p>
+
+Extended resource library.
+
+<p>
+
+Extended Numerals library.
+
+
+<!-- NEW -->
+
+
+
+<h2>Module system</h2>
+
+<li> Separate modules for <tt>abstract</tt>, 
+     <tt>concrete</tt>, and <tt>resource</tt>.
+<li> Replaces the file-based <tt>include</tt> system
+<li> Name space handling with qualified names
+<li> Hierarchic structure (single inheritance <tt>**</tt>) + 
+     cross-cutting reuse (<tt>open</tt>)
+<li> Separate compilation, one module per file
+<li> Reuse of <tt>abstract</tt>+<tt>concrete</tt> as <tt>resource</tt>
+<li> Parametrized modules:
+     <tt>interface</tt>, <tt>instance</tt>, <tt>incomplete</tt>.
+<li> New experimental module types: <tt>transfer</tt>, 
+     <tt>union</tt>.
+
+
+<!-- NEW -->
+
+<h4>Canonical format GFC</h4>
+
+<li> The target of GF compiler; to reuse, just read in.
+
+<li> Readable by Haskell/Java/C++/C applications (by BNFC generated parsers).
+
+
+
+<!-- NEW -->
+
+<h4>New features in expression language</h4>
+
+In addition to the module system:
+
+<p>
+
+<li> Disjunctive patterns <tt>P | ... | Q</tt>.
+<li> String patterns <tt>"foo"</tt>.
+<li> (?) Integer patterns <tt>74</tt>.
+<li> Binding token <tt>&+</tt> to glue separate tokens at unlexing phase, 
+     and unlexer to resolve this.
+<li> New syntax alternatives for local definitions: <tt>let</tt> without
+     braces and <tt>where</tt>.
+<li> Pattern variables can be used on lhs's of <tt>oper</tt> definitions.
+<li> New Unicode transliterations (by Harad Hammarström).
+
+
+<!-- NEW -->
+
+<h4>New shell commands and command functionalities</h4>
+
+<li> <tt>pi</tt> = <tt>print_info</tt>: information on an identifier in scope.
+<li> <tt>h</tt> = <tt>help</tt> now in long or short form, 
+     and on individual commands.
+<li> <tt>gt</tt> = <tt>generate_trees</tt>: all trees of a given
+     category or instantiations of a given incomplete term, up to a
+     given depth.
+<li> <tt>gr</tt> = <tt>generate_random</tt> can now be given
+     an incomplete term as an argument, to constrain generation.
+<li> <tt>so</tt> = <tt>show_opers</tt> shows all <tt>ope</tt>
+     operations with a given value type.
+<li> <tt>pm</tt> = <tt>print_multi</tt> prints the multilingual
+     grammar resident in the current state to a ready-compiles
+     <tt>.gfcm</tt> file.
+<li> All commands have both long and short names (see help). Short
+     names are easier to type, whereas long names
+     make scripts more readable.
+<li> Meaningless command options generate warnings.
+
+
+<!-- NEW -->
+
+<h4>New editor features</h4>
+
+<li> Active text field: click the middle button in the focus to send
+     in refinement through the parser.
+<li> Clipboard: copy complex terms into the refine menu.
+<li> Two-step refinements generated by the "Generate" operation.
+
+<!-- NEW -->
+
+<h4>Improved implementation</h4>
+
+<li> Haskell source code is organized into subdirectories.
+<li> BNF Converter is used for defining the languages GF and GFC, which also
+     give reliable LaTeX documentation.
+<li> Lexical rules sorted out by option <tt>-cflexer</tt> for efficient
+     parsing with large lexica.     
+<li> GHC optimizations and strictness flags are used for improving performance.
+
+
+<!-- NEW -->
+
+<h4>New parser (work in progress)</h4>
+
+<li> By Peter Ljunglöf, based on MCFG.
+<li> Much more efficient for morphology and discontinuous constituents.
+<li> Treatment of cyclic rules.
+<li> Currently lots of alternative parsers via flags <tt>-parser=newX</tt>.
+
+
+<!-- NEW -->
+
+<h2>Status (21/6/2004)</h2>
+
+Grammar compiler, editor GUIs, and shell work for all platforms
+(with restrictions for Solaris).
+
+<p>
+
+The updated <tt>HelpFile</tt> (accessible through <tt>h</tt> command)
+marks unsupported features present in GF 1.2 with <tt>*</tt>.
+They will be supported again if interested users appear.
+
+<p>
+
+GF1 grammars can be automatically translated to GF2 (although the
+result is not as good
+as manual, since indentation and comments are destroyed). The results can be 
+saved in GF2 files, but this is not necessary. 
+Some rarely used GF1 features are no longer supported (see next section).
+
+<p>
+
+It is also possible to write a GF2 grammar back to GF1, with the
+command <tt>pg -printer=old</tt>.
+
+
+<!-- NEW -->
+
+Resource libraries 
+and some example grammars and have been
+converted. Most old example grammars work without any changes.
+There is a new resource API with
+many new constructions.
+
+<p>
+
+A make facility works, finding out which modules have to be recompiled.
+
+<p>
+
+Soundness checking of module depencencies and completeness is not
+complete. This means that some errors may show up too late.
+
+<p>
+
+The environment variable <tt>GF_LIB_PATH</tt> needs some more work.
+
+<p>
+
+Latex and XML printing of grammars do not work yet.
+
+
+
+<!-- NEW -->
+
+<h2>How to use GF 1.* files</h2>
+
+Backward compatibility with respect to old GF grammars has been
+a central goal. All GF grammars, from version 0.9, should work in
+the old way in GF2. The main exceptions are some features that 
+are rarely used. 
+<ul>
+<li> The <tt>package</tt> system introduced in GF 1.2, cannot be
+     interpreted in the module system of GF 2.0, since packages are in
+     mutual scope with the top level.
+<li> <tt>tokenizer</tt> pragmas are cannot be parsed any more. In GF
+     1.2, they are already replaced by <tt>lexer</tt> flags.
+<li> <tt>var</tt> pragmas cannot be parsed any more.
+</ul>
+
+<p>
+
+Very old GF grammars (from versions before 0.9), with the completely
+different notation, do not work. They should be first converted to 
+GF1 by using GF version 1.2.
+
+
+<!-- NEW -->
+
+
+The import command <tt>i</tt> can be given the option <tt>-old</tt>. E.g.
+<pre>
+  i -old tut1.Eng.g2
+</pre>
+But this is no more necessary: GF2 detects automatically if a grammar
+is in the GF1 format.
+
+<p>
+
+Importing a set of GF2 files generates, internally, three modules:
+<pre>
+  abstract tut1 = ...
+  resource ResEng = ...
+  concrete Eng of tut1 = open ResEng in ...
+</pre>
+(The names are different if the file name has fewer parts.)
+
+
+<p>
+
+The option <tt>-o</tt> causes GF2 to write these modules into files.
+
+
+<!-- NEW -->
+
+The flags <tt>-abs</tt>, <tt>-cnc</tt>, and <tt>-res</tt> can be used
+to give custom names to the modules. In particular, it is good to use
+the <tt>-abs</tt> flag to guarantee that the abstract syntax module
+has the same name for all grammars in a multilingual environmens:
+<pre>
+  i -old -abs=Numerals hungarian.gf
+  i -old -abs=Numerals tamil.gf
+  i -old -abs=Numerals sanskrit.gf
+</pre>
+
+<p>
+
+The same flags as in the import command can be used when invoking
+GF2 from the system shell. Many grammars can be imported on the same command
+line, e.g.
+<pre>
+  % gf2 -old -abs=Tutorial tut1.Eng.gf tut1.Fin.gf tut1.Fra.gf
+</pre>
+
+<p>
+
+To write a GF2 grammar back to GF1 (as one big file), use the command
+<pre>
+  > pg -old
+</pre>
+
+
+
+<!-- NEW -->
+
+
+
+GF2 has more reserved words than GF 1.2. When old files are read, a preprocessor
+replaces every identifier that has the shape of a new reserved word
+with a variant where the last letter is replaced by <tt>Z</tt>, e.g.
+<tt>instance</tt> is replaced by <tt>instancZ</tt>. This method is of course
+unsafe and should be replaced by something better.
+
+
+
+
+<!-- NEW -->
+
+<h2>Abstract, concrete, and resource modules</h2>
+
+Judgement forms are sorted as follows:
+<ul>
+<li> abstract: 
+  <tt>cat</tt>, <tt>fun</tt>, <tt>def</tt>, <tt>data</tt>, <tt>flags</tt>
+<li> concrete: 
+  <tt>lincat</tt>, <tt>cat</tt>, <tt>printname</tt>, <tt>flags</tt>
+<li> resource: 
+  <tt>param</tt>, <tt>oper</tt>, <tt>flags</tt>
+<li> 
+</ul>
+
+
+<!-- NEW -->
+
+Example:
+<pre>
+  abstract Sums = {
+    cat 
+      Exp ;
+    fun 
+      One : Exp ;
+      plus : Exp -> Exp -> Exp ;
+  }
+
+  concrete EnglishSums of Sums = open ResEng in {
+    lincat 
+      Exp = {s : Str ; n : Number} ;
+    lin
+      One = expSg "one" ;
+      sum x y = expSg ("the" ++ "sum" ++ "of" ++ x.s ++ "and" ++ y.s) ;
+  }
+
+  resource ResEng = {
+    param 
+      Number = Sg | Pl ;
+    oper 
+      expSG : Str -> {s : Str ; n : Number} = \s -> {s = s ; n = Sg} ;
+  }
+</pre>
+
+
+
+<!-- NEW -->
+
+<h2>Opening and extending modules</h2>
+
+A <tt>concrete</tt> or <tt>resource</tt> can <b>open</b> a
+<tt>resource</tt>. This means that
+<ul>
+<li> the names defined in <tt>resource</tt> can be used ("become visible")
+<li> but: these names are not included in ("exported from") the opening module
+</ul>
+A module of any type can moreover <b>extend</b> a module of the same type.
+This means that
+<ul>
+<li> the names defined in the extended module can be used ("become visible")
+<li> and also: these names are included in ("exported from") the extending module
+</ul>
+Examples of extension:
+<pre>
+  abstract Products = Sums ** {
+    fun times : Exp -> Exp -> Exp ;
+  }
+  -- names exported: Exp, plus, times
+
+  concrete English of Products = EnglishSums ** open ResEng in {
+    lin times x y = expSg ("the" ++ "product" ++ "of" ++ x.s ++ "and" ++ y.s) ;
+  }
+</pre>
+Another important difference:
+<li> extension is single
+<li> opening can be multiple: <tt>open Foo, Bar, Baz in {...}</tt>
+
+<!-- NEW -->
+
+Moreover:
+<li> opening can be <b>qualified</b>
+<p>
+Example of qualified opening:
+<pre>
+  concrete NumberSystems of Systems = open (Bin = Binary), (Dec = Decimal) in {
+    lin 
+      BZero = Bin.Zero ;
+      DZero = Dec.Zero
+  }
+</pre>
+  
+
+<!-- NEW -->
+
+<h2>Compiling modules</h2>
+
+Separate compilation assumes there is <b>one module per file</b>.
+
+<p>
+
+The <b>module header</b> is the beginning of the module code up to the
+first left bracket (<tt>{</tt>). The header gives
+<ul>
+<li> the module type: <tt>abstract</tt>, <tt>concrete</tt> (<tt>of</tt> <i>A</i>), 
+  or <tt>resource</tt>
+<li> the name of the module (next to the module type keyword)
+<li> the name of extended module (between <tt>=</tt> and <tt>**</tt>)
+<li> the names of opened modules
+</ul>
+
+<!-- NEW -->
+
+
+<b>filename</b> = <b>modulename</b> <tt>.</tt> <b>extension</b>
+
+<p>
+
+File name extensions:
+<ul>
+<li> <tt>gf</tt>: GF source file (uses GF syntax, is type checked and compiled)
+<li> <tt>gfc</tt>: canonical GF file (uses GFC syntax, is simply read
+in instead of compiled; produced from all kinds of modules)
+<li> <tt>gfr</tt>: GF resource file (uses GF syntax, is only read in; produced from
+<tt>resource</tt> modules)
+<li> <tt>gfcm</tt>: canonical multilingual GF file 
+(uses GFC syntax, is only read in; produced 
+from a set of <tt>abstract</tt> and <tt>conctrete</tt> modules)
+</ul>
+Only <tt>gf</tt> files should ever be written/edited manually!
+
+
+
+<!-- NEW -->
+
+
+What the make facility does when compiling <tt>Foo.gf</tt>
+<ol>
+<li> read the module header of <tt>Foo.gf</tt>, and recursively all headers from
+the modules it <b>depends</b> on (i.e. extends or opens) 
+<li> build a dependency graph of these modules, and do topological sorting
+<li> starting from the first module in topological order,
+compare the modification times of each <tt>gf</tt> and <tt>gfc</tt> file:
+<ul>
+<li> if <tt>gf</tt> is later, compile the module and all modules depending on it
+<li> if <tt>gfc</tt> is later, just read in the module
+</ul>
+</ol>
+Inside the GF shell, also time stamps of modules read into memory are
+taken into account. Thus a module need not be read from a file if the
+module is in the memory and the file has not been modified.
+
+
+<!-- NEW -->
+
+If the compilation of a grammar fails at some module, the state of the
+GF shell contains all modules read up to that point. This makes it
+faster to compile the faulty module again after fixing it.
+
+<p>
+
+Use the command <tt>po</tt> = <tt>print_options</tt> to see what
+modules are in the state.
+
+<p>
+
+To force compilation:
+<ul>
+<li> The flag <i>-src</i> in the import command forces compilation from
+     source even if more recent object files exist. This is useful
+     when testing new versions of GF.
+<li> The flag <i>-retain</i> in the import command forces reading in
+     <tt>gfr</tt> files in addition to <tt>gfc</tt> files. This is useful
+     when testing operations with the <tt>cc</tt> command.
+</ul>
+
+<!-- NEW -->
+
+<h2>Module search paths</h2>
+
+Modules can reside in different directories. Use the <tt>path</tt>
+flag to extend the directory search path. For instance,
+<pre>
+  -path=.:../resource/russian:../prelude
+</pre>
+enables files to be found in three different directories.
+By default, only the current directory is included.
+If a <tt>path</tt> flag is given, the current directory
+<tt>.</tt> must be explicitly included if it is wanted.
+
+<p>
+
+The <tt>path</tt> flag can be set in any of the following
+places:
+<ul>
+<li> when invoking GF: <tt>gf -path=xxx</tt>
+<li> when importing a module: <tt>i -path=xxx Foo.gf</tt>
+<li> as a pragma in a topmost file: <tt>--# -path=xxx</tt>
+</ul>
+A flag set on a command line overrides ones set in files.
+
+<p>
+
+The value of the environment variable <tt>GF_LIB_PATH</tt> is
+appended to the user-given path.
+
+
+<!-- NEW -->
+
+<h2>To do</h2>
+
+Testing
+
+<p>
+
+Documentation
+
+<p> 
+
+Packaging
+
+
+
+<!-- NEW -->
+
+<h2>Nasty details</h2>
+
+
+<li> Readline in Solaris
+
+<li> Proper treatment file search paths
+
+<li> Unicode fonts in GUIs
+
+<li> directionality of Semitic alphabets
+
+
+
+</body>
+</html>
--- a/src/GF/API.hs
+++ b/src/GF/API.hs
@@ -148,8 +148,9 @@ string2srcTerm gr m s = do
 randomTreesIO :: Options -> GFGrammar -> Int -> IO [Tree]
 randomTreesIO opts gr n = do
  gen <- myStdGen mx
-  t   <- err (\s -> putStrLnFlush s >> return []) (return . singleton) $ 
-                                                       mkRandomTree gen mx g catfun
+  t   <- err (\s -> putS s >> return []) 
+             (return . singleton) $ 
+                mkRandomTree gen mx g catfun
  ts  <- if n==1 then return [] else randomTreesIO opts gr (n-1)
  return $ t ++ ts
 where
@@ -158,6 +159,8 @@ randomTreesIO opts gr n = do
     _ -> Left $ firstAbsCat opts gr
   g   = grammar gr
   mx  = optIntOrN opts flagDepth 41
+   putS s = if oElem beSilent opts then return () else putStrLnFlush s
+

 generateTrees :: Options -> GFGrammar -> Maybe Tree -> [Tree]
 generateTrees opts gr mt =
--- a/src/GF/Compile/Compile.hs
+++ b/src/GF/Compile/Compile.hs
@@ -35,6 +35,10 @@ import Arch

 import Monad

+-- environment variable for grammar search path
+
+gfGrammarPathVar = "GF_LIB_PATH"
+
 -- in batch mode: write code in a file

 batchCompile f = liftM fst $ compileModule defOpts emptyShellState f
@@ -86,9 +90,10 @@ compileModule opts1 st0 file = do
  let opts = addOptions opts1 opts0 
  let ps0  = pathListOpts opts
  let fpath = justInitPath file
-  let ps = if useFileOpt 
-             then (map (prefixPathName fpath) ps0)
-             else ps0
+  let ps1 = if useFileOpt 
+              then (map (prefixPathName fpath) ps0)
+              else ps0
+  ps <- ioeIO $ extendPathEnv gfGrammarPathVar ps1
  let ioeIOIf = if oElem beSilent opts then (const (return ())) else ioeIO
  ioeIOIf $ putStrLn $ "module search path:" +++ show ps ----
  let putp = putPointE opts
--- a/src/GF/Infra/UseIO.hs
+++ b/src/GF/Infra/UseIO.hs
@@ -81,6 +81,13 @@ doesFileExistPath paths file = do
  mpfile <- ioeIO $ getFilePath paths file
  return $ maybe False (const True) mpfile

+-- path in environment variable has lower priority
+extendPathEnv :: String -> [FilePath] -> IO [FilePath]
+extendPathEnv var ps = do
+  s <- catch (getEnv var) (const (return ""))
+  let fs = pFilePaths s
+  return $ ps ++ fs
+
 pFilePaths :: String -> [FilePath]
 pFilePaths s = case span (/=':') s of
  (f,_:cs) -> f : pFilePaths cs
--- a/src/GF/Shell/TeachYourself.hs
+++ b/src/GF/Shell/TeachYourself.hs
@@ -24,7 +24,7 @@ teachTranslation opts ig og = do
 transTrainList :: 
  Options -> GFGrammar -> GFGrammar -> Integer -> IO [(String,[String])]
 transTrainList opts ig og number = do
-  ts <- randomTreesIO opts ig (fromInteger number)
+  ts <- randomTreesIO (addOption beSilent opts) ig (fromInteger number)
  return $ map mkOne $ ts
 where
   cat = firstCatOpts opts ig
@@ -39,7 +39,7 @@ teachMorpho opts ig = useIOE () $ do

 morphoTrainList :: Options -> GFGrammar -> Integer -> IOE [(String,[String])]
 morphoTrainList opts ig number = do
-  ts   <- ioeIO $ randomTreesIO opts ig (fromInteger number)
+  ts   <- ioeIO $ randomTreesIO (addOption beSilent opts) ig (fromInteger number)
  gen  <- ioeIO $ myStdGen (fromInteger number)
  mkOnes gen ts
 where
@@ -49,9 +49,9 @@ morphoTrainList opts ig number = do
     let (i,gen') = randomR (0, length pss - 1) gen
     (ps,ss) <- ioeErr $ pss !? i
     (_,ss0) <- ioeErr $ pss !? 0
-     let bas = concat $ take 1 ss0
+     let bas = unwords ss0 --- concat $ take 1 ss0
     more <- mkOnes gen' ts
-     return $ (bas +++ ":" +++ unwords (map prt_ ps), return (concat ss)) : more
+     return $ (bas +++ ":" +++ unwords (map prt_ ps), return (unwords ss)) : more
   mkOnes gen [] = return []
 
   gr = grammar ig