treebank creating script; duplicated consonants in LexiconEng

This commit is contained in:
aarne
2006-03-04 14:58:11 +00:00
parent 277a333a02
commit ee26cf955b
9 changed files with 70 additions and 16 deletions

View File

@@ -12,6 +12,29 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
</center>
<p>
4/3 (AR) Added command <tt>use_treebank = ut</tt> for lookup in a treebank.
This command can be used as a fast substitute for parsing, but also as a
way to browse treebanks.
<pre>
ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation
ut -assocs | grep "ComplV2" -- show all associations with ComplV2
</pre>
<p>
3/3 (AR) Added option <tt>-treebank</tt> to the <tt>i</tt> command. This adds treebanks to
the shell state. The possible file formats are
<ol>
<li> XML file with a multilingual treebank, produced by <tt>tb -xml</tt>
<li> tab-organized text file with a unilingual treebank, produced by <tt>ut -assocs</tt>
</ol>
Notice that the treebanks in shell state are unilingual, and have strings as keys.
Multilingual treebanks have trees as keys. In case 1, one unilingual treebank per
language is built in the shell state.
<p>
1/3 (AR) Added option <tt>-trees</tt> to the command <tt>tree_bank = tb</tt>.

View File

@@ -23,6 +23,9 @@ multimodal:
compiled:
cd .. ; tar cvfz compiled.tgz alltenses/ mathematical/ multimodal/ present/
treebank:
gf <mkTreebank.gfs
stat:
wc */*.gfc

View File

@@ -13,13 +13,13 @@ lin
art_N = regN "art" ;
ask_V2Q = mkV2Q (regV "ask") [] ;
baby_N = regN "baby" ;
bad_A = regADeg "bad" ;
bad_A = mkADeg "bad" "badly" "worse" "worst" ;
bank_N = regN "bank" ;
beautiful_A = regADeg "beautiful" ;
become_VA = mkVA (irregV "become" "became" "become") ;
beer_N = regN "beer" ;
beg_V2V = mkV2V (regDuplV "beg") [] "to" ;
big_A = regADeg "big" ;
big_A = duplADeg "big" ;
bike_N = regN "bike" ;
bird_N = regN "bird" ;
black_A = regADeg "black" ;
@@ -74,7 +74,7 @@ lin
find_V2 = dirV2 (irregV "find" "found" "found") ;
fish_N = mk2N "fish" "fish" ;
floor_N = regN "floor" ;
forget_V2 = dirV2 (irregV "forget" "forgot" "forgotten") ;
forget_V2 = dirV2 (irregDuplV "forget" "forgot" "forgotten") ;
fridge_N = regN "fridge" ;
friend_N = regN "friend" ;
fruit_N = regN "fruit" ;
@@ -94,7 +94,7 @@ lin
hill_N = regN "hill" ;
hope_VS = mkVS (regV "hope") ;
horse_N = regN "horse" ;
hot_A = regADeg "hot" ;
hot_A = duplADeg "hot" ;
house_N = regN "house" ;
important_A = compoundADeg (regA "important") ;
industry_N = regN "industry" ;
@@ -142,7 +142,7 @@ lin
radio_N = regN "radio" ;
rain_V0 = mkV0 (regV "rain") ;
read_V2 = dirV2 (irregV "read" "read" "read") ;
red_A = regADeg "red" ;
red_A = duplADeg "red" ;
religion_N = regN "religion" ;
restaurant_N = regN "restaurant" ;
river_N = regN "river" ;
@@ -186,7 +186,7 @@ lin
teach_V2 = dirV2 (irregV "teach" "taught" "taught") ;
television_N = regN "television" ;
thick_A = regADeg "thick" ;
thin_A = regADeg "thin" ;
thin_A = duplADeg "thin" ;
train_N = regN "train" ;
travel_V = (regDuplV "travel") ;
tree_N = regN "tree" ;
@@ -204,7 +204,7 @@ lin
white_A = regADeg "white" ;
window_N = regN "window" ;
wine_N = regN "wine" ;
win_V2 = dirV2 (irregV "win" "won" "won") ;
win_V2 = dirV2 (irregDuplV "win" "won" "won") ;
woman_N = mk2N "woman" "women" ;
wonder_VQ = mkVQ (regV "wonder") ;
wood_N = regN "wood" ;

View File

@@ -18,6 +18,7 @@ abstract MinStructural = Cat ** {
if_Subj : Subj ;
in_Prep : Prep ;
that_NP : NP ;
this_NP : NP ;
we_Pron : Pron ;
whichPl_IDet : IDet ;
whichSg_IDet : IDet ;

View File

@@ -13,6 +13,7 @@ incomplete concrete MinStructuralI of MinStructural = open Structural in {
if_Subj = if_Subj ;
in_Prep = in_Prep ;
that_NP = that_NP ;
this_NP = this_NP ;
we_Pron = we_Pron ;
whichPl_IDet = whichPl_IDet ;
whichSg_IDet = whichSg_IDet ;

View File

@@ -0,0 +1,10 @@
-- to create a treebank with 3066 trees, for all languages
i minimal/MinimalEng.gf
gt -depth=4 -cat=S | tb -xml | grep -v "/treebank>" | wf min.xml
gt -depth=4 -cat=QS | tb -xml | grep -v "treebank>" | af min.xml
gt -depth=3 (UttImpSg ? ?) | tb -xml | grep -v "treebank>" | af min.xml
gt -depth=4 -cat=NP | tb -xml | grep -v "<treebank" | af min.xml
e
i -nocf langs.gfcm
rf min.xml | tb -trees | tb -xml | wf langs.xml

View File

@@ -245,11 +245,19 @@ txtHelpFile =
"\n rf tb.xml | tb -c -- compare-test treebank from file" ++
"\n rf old.xml | tb -trees | tb -xml -- create new treebank from old" ++
"\n" ++
"\nlt, lookup_treebank: lt String" ++
"\nut, use_treebank: ut String" ++
"\n Lookup a string in a treebank and return the resulting trees." ++
"\n Use 'tb' to create a treebank and 'i -treebank' to read it in memory." ++
"\n flag:" ++
"\n -treebank use this treebank (instead of the latest introduced one) TODO" ++
"\n Use 'tb' to create a treebank and 'i -treebank' to read one from" ++
"\n a file." ++
"\n options:" ++
"\n -assocs show all string-trees associations in the treebank" ++
"\n -strings show all strings in the treebank" ++
"\n -raw return result as string, without typechecking it" ++
"\n flags:" ++
"\n -treebank use this treebank (instead of the latest introduced one)" ++
"\n examples:" ++
"\n ut \"He adds this to that\" | l -multi -- use treebank lookup as parser in translation" ++
"\n ut -assocs | grep \"ComplV2\" -- show all associations with ComplV2" ++
"\n" ++
"\ntt, test_tokenizer: tt String" ++
"\n Show the token list sent to the parser when String is parsed." ++

View File

@@ -114,7 +114,7 @@ pCommand ws = case ws of
"cc" : s -> aUnit $ CComputeConcrete $ unwords s
"so" : s -> aUnit $ CShowOpers $ unwords s
"tb" : [] -> aUnit CTreeBank
"lt" : s -> aString CLookupTreebank s
"ut" : s -> aString CLookupTreebank s
"tq" : i:o:[] -> aUnit (CTranslationQuiz (language i) (language o))
"tl":i:o:[] -> aUnit (CTranslationList (language i) (language o))

View File

@@ -216,11 +216,19 @@ tb, tree_bank: tb
rf tb.xml | tb -c -- compare-test treebank from file
rf old.xml | tb -trees | tb -xml -- create new treebank from old
lt, lookup_treebank: lt String
ut, use_treebank: ut String
Lookup a string in a treebank and return the resulting trees.
Use 'tb' to create a treebank and 'i -treebank' to read it in memory.
flag:
-treebank use this treebank (instead of the latest introduced one) TODO
Use 'tb' to create a treebank and 'i -treebank' to read one from
a file.
options:
-assocs show all string-trees associations in the treebank
-strings show all strings in the treebank
-raw return result as string, without typechecking it
flags:
-treebank use this treebank (instead of the latest introduced one)
examples:
ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation
ut -assocs | grep "ComplV2" -- show all associations with ComplV2
tt, test_tokenizer: tt String
Show the token list sent to the parser when String is parsed.