From ee26cf955b4fdceb6d6ec427bb3d138f7e869a55 Mon Sep 17 00:00:00 2001 From: aarne Date: Sat, 4 Mar 2006 14:58:11 +0000 Subject: [PATCH] treebank creating script; duplicated consonants in LexiconEng --- doc/gf-history.html | 23 ++++++++++++++++++++++ lib/resource-1.0/Makefile | 3 +++ lib/resource-1.0/english/LexiconEng.gf | 14 ++++++------- lib/resource-1.0/minimal/MinStructural.gf | 1 + lib/resource-1.0/minimal/MinStructuralI.gf | 1 + lib/resource-1.0/mkTreebank.gfs | 10 ++++++++++ src/GF/Shell/HelpFile.hs | 16 +++++++++++---- src/GF/Shell/PShell.hs | 2 +- src/HelpFile | 16 +++++++++++---- 9 files changed, 70 insertions(+), 16 deletions(-) create mode 100644 lib/resource-1.0/mkTreebank.gfs diff --git a/doc/gf-history.html b/doc/gf-history.html index 9450c8d99..0c913635a 100644 --- a/doc/gf-history.html +++ b/doc/gf-history.html @@ -12,6 +12,29 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2 +

+ +4/3 (AR) Added command use_treebank = ut for lookup in a treebank. +This command can be used as a fast substitute for parsing, but also as a +way to browse treebanks. +

+  ut "He adds this to that" | l -multi   -- use treebank lookup as parser in translation
+  ut -assocs | grep "ComplV2"            -- show all associations with ComplV2
+
+ +

+ +3/3 (AR) Added option -treebank to the i command. This adds treebanks to +the shell state. The possible file formats are +

    +
  1. XML file with a multilingual treebank, produced by tb -xml +
  2. tab-organized text file with a unilingual treebank, produced by ut -assocs +
+Notice that the treebanks in shell state are unilingual, and have strings as keys. +Multilingual treebanks have trees as keys. In case 1, one unilingual treebank per +language is built in the shell state. + +

1/3 (AR) Added option -trees to the command tree_bank = tb. diff --git a/lib/resource-1.0/Makefile b/lib/resource-1.0/Makefile index 91e08f4d6..911744438 100644 --- a/lib/resource-1.0/Makefile +++ b/lib/resource-1.0/Makefile @@ -23,6 +23,9 @@ multimodal: compiled: cd .. ; tar cvfz compiled.tgz alltenses/ mathematical/ multimodal/ present/ +treebank: + gf " | wf min.xml +gt -depth=4 -cat=QS | tb -xml | grep -v "treebank>" | af min.xml +gt -depth=3 (UttImpSg ? ?) | tb -xml | grep -v "treebank>" | af min.xml +gt -depth=4 -cat=NP | tb -xml | grep -v " aUnit $ CComputeConcrete $ unwords s "so" : s -> aUnit $ CShowOpers $ unwords s "tb" : [] -> aUnit CTreeBank - "lt" : s -> aString CLookupTreebank s + "ut" : s -> aString CLookupTreebank s "tq" : i:o:[] -> aUnit (CTranslationQuiz (language i) (language o)) "tl":i:o:[] -> aUnit (CTranslationList (language i) (language o)) diff --git a/src/HelpFile b/src/HelpFile index aff813043..bea9a7b9a 100644 --- a/src/HelpFile +++ b/src/HelpFile @@ -216,11 +216,19 @@ tb, tree_bank: tb rf tb.xml | tb -c -- compare-test treebank from file rf old.xml | tb -trees | tb -xml -- create new treebank from old -lt, lookup_treebank: lt String +ut, use_treebank: ut String Lookup a string in a treebank and return the resulting trees. - Use 'tb' to create a treebank and 'i -treebank' to read it in memory. - flag: - -treebank use this treebank (instead of the latest introduced one) TODO + Use 'tb' to create a treebank and 'i -treebank' to read one from + a file. + options: + -assocs show all string-trees associations in the treebank + -strings show all strings in the treebank + -raw return result as string, without typechecking it + flags: + -treebank use this treebank (instead of the latest introduced one) + examples: + ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation + ut -assocs | grep "ComplV2" -- show all associations with ComplV2 tt, test_tokenizer: tt String Show the token list sent to the parser when String is parsed.