forked from GitHub/gf-core
treebank creating script; duplicated consonants in LexiconEng
This commit is contained in:
@@ -12,6 +12,29 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
|
||||
|
||||
</center>
|
||||
|
||||
<p>
|
||||
|
||||
4/3 (AR) Added command <tt>use_treebank = ut</tt> for lookup in a treebank.
|
||||
This command can be used as a fast substitute for parsing, but also as a
|
||||
way to browse treebanks.
|
||||
<pre>
|
||||
ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation
|
||||
ut -assocs | grep "ComplV2" -- show all associations with ComplV2
|
||||
</pre>
|
||||
|
||||
<p>
|
||||
|
||||
3/3 (AR) Added option <tt>-treebank</tt> to the <tt>i</tt> command. This adds treebanks to
|
||||
the shell state. The possible file formats are
|
||||
<ol>
|
||||
<li> XML file with a multilingual treebank, produced by <tt>tb -xml</tt>
|
||||
<li> tab-organized text file with a unilingual treebank, produced by <tt>ut -assocs</tt>
|
||||
</ol>
|
||||
Notice that the treebanks in shell state are unilingual, and have strings as keys.
|
||||
Multilingual treebanks have trees as keys. In case 1, one unilingual treebank per
|
||||
language is built in the shell state.
|
||||
|
||||
|
||||
<p>
|
||||
|
||||
1/3 (AR) Added option <tt>-trees</tt> to the command <tt>tree_bank = tb</tt>.
|
||||
|
||||
@@ -23,6 +23,9 @@ multimodal:
|
||||
compiled:
|
||||
cd .. ; tar cvfz compiled.tgz alltenses/ mathematical/ multimodal/ present/
|
||||
|
||||
treebank:
|
||||
gf <mkTreebank.gfs
|
||||
|
||||
stat:
|
||||
wc */*.gfc
|
||||
|
||||
|
||||
@@ -13,13 +13,13 @@ lin
|
||||
art_N = regN "art" ;
|
||||
ask_V2Q = mkV2Q (regV "ask") [] ;
|
||||
baby_N = regN "baby" ;
|
||||
bad_A = regADeg "bad" ;
|
||||
bad_A = mkADeg "bad" "badly" "worse" "worst" ;
|
||||
bank_N = regN "bank" ;
|
||||
beautiful_A = regADeg "beautiful" ;
|
||||
become_VA = mkVA (irregV "become" "became" "become") ;
|
||||
beer_N = regN "beer" ;
|
||||
beg_V2V = mkV2V (regDuplV "beg") [] "to" ;
|
||||
big_A = regADeg "big" ;
|
||||
big_A = duplADeg "big" ;
|
||||
bike_N = regN "bike" ;
|
||||
bird_N = regN "bird" ;
|
||||
black_A = regADeg "black" ;
|
||||
@@ -74,7 +74,7 @@ lin
|
||||
find_V2 = dirV2 (irregV "find" "found" "found") ;
|
||||
fish_N = mk2N "fish" "fish" ;
|
||||
floor_N = regN "floor" ;
|
||||
forget_V2 = dirV2 (irregV "forget" "forgot" "forgotten") ;
|
||||
forget_V2 = dirV2 (irregDuplV "forget" "forgot" "forgotten") ;
|
||||
fridge_N = regN "fridge" ;
|
||||
friend_N = regN "friend" ;
|
||||
fruit_N = regN "fruit" ;
|
||||
@@ -94,7 +94,7 @@ lin
|
||||
hill_N = regN "hill" ;
|
||||
hope_VS = mkVS (regV "hope") ;
|
||||
horse_N = regN "horse" ;
|
||||
hot_A = regADeg "hot" ;
|
||||
hot_A = duplADeg "hot" ;
|
||||
house_N = regN "house" ;
|
||||
important_A = compoundADeg (regA "important") ;
|
||||
industry_N = regN "industry" ;
|
||||
@@ -142,7 +142,7 @@ lin
|
||||
radio_N = regN "radio" ;
|
||||
rain_V0 = mkV0 (regV "rain") ;
|
||||
read_V2 = dirV2 (irregV "read" "read" "read") ;
|
||||
red_A = regADeg "red" ;
|
||||
red_A = duplADeg "red" ;
|
||||
religion_N = regN "religion" ;
|
||||
restaurant_N = regN "restaurant" ;
|
||||
river_N = regN "river" ;
|
||||
@@ -186,7 +186,7 @@ lin
|
||||
teach_V2 = dirV2 (irregV "teach" "taught" "taught") ;
|
||||
television_N = regN "television" ;
|
||||
thick_A = regADeg "thick" ;
|
||||
thin_A = regADeg "thin" ;
|
||||
thin_A = duplADeg "thin" ;
|
||||
train_N = regN "train" ;
|
||||
travel_V = (regDuplV "travel") ;
|
||||
tree_N = regN "tree" ;
|
||||
@@ -204,7 +204,7 @@ lin
|
||||
white_A = regADeg "white" ;
|
||||
window_N = regN "window" ;
|
||||
wine_N = regN "wine" ;
|
||||
win_V2 = dirV2 (irregV "win" "won" "won") ;
|
||||
win_V2 = dirV2 (irregDuplV "win" "won" "won") ;
|
||||
woman_N = mk2N "woman" "women" ;
|
||||
wonder_VQ = mkVQ (regV "wonder") ;
|
||||
wood_N = regN "wood" ;
|
||||
|
||||
@@ -18,6 +18,7 @@ abstract MinStructural = Cat ** {
|
||||
if_Subj : Subj ;
|
||||
in_Prep : Prep ;
|
||||
that_NP : NP ;
|
||||
this_NP : NP ;
|
||||
we_Pron : Pron ;
|
||||
whichPl_IDet : IDet ;
|
||||
whichSg_IDet : IDet ;
|
||||
|
||||
@@ -13,6 +13,7 @@ incomplete concrete MinStructuralI of MinStructural = open Structural in {
|
||||
if_Subj = if_Subj ;
|
||||
in_Prep = in_Prep ;
|
||||
that_NP = that_NP ;
|
||||
this_NP = this_NP ;
|
||||
we_Pron = we_Pron ;
|
||||
whichPl_IDet = whichPl_IDet ;
|
||||
whichSg_IDet = whichSg_IDet ;
|
||||
|
||||
10
lib/resource-1.0/mkTreebank.gfs
Normal file
10
lib/resource-1.0/mkTreebank.gfs
Normal file
@@ -0,0 +1,10 @@
|
||||
-- to create a treebank with 3066 trees, for all languages
|
||||
|
||||
i minimal/MinimalEng.gf
|
||||
gt -depth=4 -cat=S | tb -xml | grep -v "/treebank>" | wf min.xml
|
||||
gt -depth=4 -cat=QS | tb -xml | grep -v "treebank>" | af min.xml
|
||||
gt -depth=3 (UttImpSg ? ?) | tb -xml | grep -v "treebank>" | af min.xml
|
||||
gt -depth=4 -cat=NP | tb -xml | grep -v "<treebank" | af min.xml
|
||||
e
|
||||
i -nocf langs.gfcm
|
||||
rf min.xml | tb -trees | tb -xml | wf langs.xml
|
||||
@@ -245,11 +245,19 @@ txtHelpFile =
|
||||
"\n rf tb.xml | tb -c -- compare-test treebank from file" ++
|
||||
"\n rf old.xml | tb -trees | tb -xml -- create new treebank from old" ++
|
||||
"\n" ++
|
||||
"\nlt, lookup_treebank: lt String" ++
|
||||
"\nut, use_treebank: ut String" ++
|
||||
"\n Lookup a string in a treebank and return the resulting trees." ++
|
||||
"\n Use 'tb' to create a treebank and 'i -treebank' to read it in memory." ++
|
||||
"\n flag:" ++
|
||||
"\n -treebank use this treebank (instead of the latest introduced one) TODO" ++
|
||||
"\n Use 'tb' to create a treebank and 'i -treebank' to read one from" ++
|
||||
"\n a file." ++
|
||||
"\n options:" ++
|
||||
"\n -assocs show all string-trees associations in the treebank" ++
|
||||
"\n -strings show all strings in the treebank" ++
|
||||
"\n -raw return result as string, without typechecking it" ++
|
||||
"\n flags:" ++
|
||||
"\n -treebank use this treebank (instead of the latest introduced one)" ++
|
||||
"\n examples:" ++
|
||||
"\n ut \"He adds this to that\" | l -multi -- use treebank lookup as parser in translation" ++
|
||||
"\n ut -assocs | grep \"ComplV2\" -- show all associations with ComplV2" ++
|
||||
"\n" ++
|
||||
"\ntt, test_tokenizer: tt String" ++
|
||||
"\n Show the token list sent to the parser when String is parsed." ++
|
||||
|
||||
@@ -114,7 +114,7 @@ pCommand ws = case ws of
|
||||
"cc" : s -> aUnit $ CComputeConcrete $ unwords s
|
||||
"so" : s -> aUnit $ CShowOpers $ unwords s
|
||||
"tb" : [] -> aUnit CTreeBank
|
||||
"lt" : s -> aString CLookupTreebank s
|
||||
"ut" : s -> aString CLookupTreebank s
|
||||
|
||||
"tq" : i:o:[] -> aUnit (CTranslationQuiz (language i) (language o))
|
||||
"tl":i:o:[] -> aUnit (CTranslationList (language i) (language o))
|
||||
|
||||
16
src/HelpFile
16
src/HelpFile
@@ -216,11 +216,19 @@ tb, tree_bank: tb
|
||||
rf tb.xml | tb -c -- compare-test treebank from file
|
||||
rf old.xml | tb -trees | tb -xml -- create new treebank from old
|
||||
|
||||
lt, lookup_treebank: lt String
|
||||
ut, use_treebank: ut String
|
||||
Lookup a string in a treebank and return the resulting trees.
|
||||
Use 'tb' to create a treebank and 'i -treebank' to read it in memory.
|
||||
flag:
|
||||
-treebank use this treebank (instead of the latest introduced one) TODO
|
||||
Use 'tb' to create a treebank and 'i -treebank' to read one from
|
||||
a file.
|
||||
options:
|
||||
-assocs show all string-trees associations in the treebank
|
||||
-strings show all strings in the treebank
|
||||
-raw return result as string, without typechecking it
|
||||
flags:
|
||||
-treebank use this treebank (instead of the latest introduced one)
|
||||
examples:
|
||||
ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation
|
||||
ut -assocs | grep "ComplV2" -- show all associations with ComplV2
|
||||
|
||||
tt, test_tokenizer: tt String
|
||||
Show the token list sent to the parser when String is parsed.
|
||||
|
||||
Reference in New Issue
Block a user