mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 19:22:50 -06:00
treebank creating script; duplicated consonants in LexiconEng
This commit is contained in:
@@ -12,6 +12,29 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
|
|||||||
|
|
||||||
</center>
|
</center>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
|
4/3 (AR) Added command <tt>use_treebank = ut</tt> for lookup in a treebank.
|
||||||
|
This command can be used as a fast substitute for parsing, but also as a
|
||||||
|
way to browse treebanks.
|
||||||
|
<pre>
|
||||||
|
ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation
|
||||||
|
ut -assocs | grep "ComplV2" -- show all associations with ComplV2
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
|
3/3 (AR) Added option <tt>-treebank</tt> to the <tt>i</tt> command. This adds treebanks to
|
||||||
|
the shell state. The possible file formats are
|
||||||
|
<ol>
|
||||||
|
<li> XML file with a multilingual treebank, produced by <tt>tb -xml</tt>
|
||||||
|
<li> tab-organized text file with a unilingual treebank, produced by <tt>ut -assocs</tt>
|
||||||
|
</ol>
|
||||||
|
Notice that the treebanks in shell state are unilingual, and have strings as keys.
|
||||||
|
Multilingual treebanks have trees as keys. In case 1, one unilingual treebank per
|
||||||
|
language is built in the shell state.
|
||||||
|
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
|
||||||
1/3 (AR) Added option <tt>-trees</tt> to the command <tt>tree_bank = tb</tt>.
|
1/3 (AR) Added option <tt>-trees</tt> to the command <tt>tree_bank = tb</tt>.
|
||||||
|
|||||||
@@ -23,6 +23,9 @@ multimodal:
|
|||||||
compiled:
|
compiled:
|
||||||
cd .. ; tar cvfz compiled.tgz alltenses/ mathematical/ multimodal/ present/
|
cd .. ; tar cvfz compiled.tgz alltenses/ mathematical/ multimodal/ present/
|
||||||
|
|
||||||
|
treebank:
|
||||||
|
gf <mkTreebank.gfs
|
||||||
|
|
||||||
stat:
|
stat:
|
||||||
wc */*.gfc
|
wc */*.gfc
|
||||||
|
|
||||||
|
|||||||
@@ -13,13 +13,13 @@ lin
|
|||||||
art_N = regN "art" ;
|
art_N = regN "art" ;
|
||||||
ask_V2Q = mkV2Q (regV "ask") [] ;
|
ask_V2Q = mkV2Q (regV "ask") [] ;
|
||||||
baby_N = regN "baby" ;
|
baby_N = regN "baby" ;
|
||||||
bad_A = regADeg "bad" ;
|
bad_A = mkADeg "bad" "badly" "worse" "worst" ;
|
||||||
bank_N = regN "bank" ;
|
bank_N = regN "bank" ;
|
||||||
beautiful_A = regADeg "beautiful" ;
|
beautiful_A = regADeg "beautiful" ;
|
||||||
become_VA = mkVA (irregV "become" "became" "become") ;
|
become_VA = mkVA (irregV "become" "became" "become") ;
|
||||||
beer_N = regN "beer" ;
|
beer_N = regN "beer" ;
|
||||||
beg_V2V = mkV2V (regDuplV "beg") [] "to" ;
|
beg_V2V = mkV2V (regDuplV "beg") [] "to" ;
|
||||||
big_A = regADeg "big" ;
|
big_A = duplADeg "big" ;
|
||||||
bike_N = regN "bike" ;
|
bike_N = regN "bike" ;
|
||||||
bird_N = regN "bird" ;
|
bird_N = regN "bird" ;
|
||||||
black_A = regADeg "black" ;
|
black_A = regADeg "black" ;
|
||||||
@@ -74,7 +74,7 @@ lin
|
|||||||
find_V2 = dirV2 (irregV "find" "found" "found") ;
|
find_V2 = dirV2 (irregV "find" "found" "found") ;
|
||||||
fish_N = mk2N "fish" "fish" ;
|
fish_N = mk2N "fish" "fish" ;
|
||||||
floor_N = regN "floor" ;
|
floor_N = regN "floor" ;
|
||||||
forget_V2 = dirV2 (irregV "forget" "forgot" "forgotten") ;
|
forget_V2 = dirV2 (irregDuplV "forget" "forgot" "forgotten") ;
|
||||||
fridge_N = regN "fridge" ;
|
fridge_N = regN "fridge" ;
|
||||||
friend_N = regN "friend" ;
|
friend_N = regN "friend" ;
|
||||||
fruit_N = regN "fruit" ;
|
fruit_N = regN "fruit" ;
|
||||||
@@ -94,7 +94,7 @@ lin
|
|||||||
hill_N = regN "hill" ;
|
hill_N = regN "hill" ;
|
||||||
hope_VS = mkVS (regV "hope") ;
|
hope_VS = mkVS (regV "hope") ;
|
||||||
horse_N = regN "horse" ;
|
horse_N = regN "horse" ;
|
||||||
hot_A = regADeg "hot" ;
|
hot_A = duplADeg "hot" ;
|
||||||
house_N = regN "house" ;
|
house_N = regN "house" ;
|
||||||
important_A = compoundADeg (regA "important") ;
|
important_A = compoundADeg (regA "important") ;
|
||||||
industry_N = regN "industry" ;
|
industry_N = regN "industry" ;
|
||||||
@@ -142,7 +142,7 @@ lin
|
|||||||
radio_N = regN "radio" ;
|
radio_N = regN "radio" ;
|
||||||
rain_V0 = mkV0 (regV "rain") ;
|
rain_V0 = mkV0 (regV "rain") ;
|
||||||
read_V2 = dirV2 (irregV "read" "read" "read") ;
|
read_V2 = dirV2 (irregV "read" "read" "read") ;
|
||||||
red_A = regADeg "red" ;
|
red_A = duplADeg "red" ;
|
||||||
religion_N = regN "religion" ;
|
religion_N = regN "religion" ;
|
||||||
restaurant_N = regN "restaurant" ;
|
restaurant_N = regN "restaurant" ;
|
||||||
river_N = regN "river" ;
|
river_N = regN "river" ;
|
||||||
@@ -186,7 +186,7 @@ lin
|
|||||||
teach_V2 = dirV2 (irregV "teach" "taught" "taught") ;
|
teach_V2 = dirV2 (irregV "teach" "taught" "taught") ;
|
||||||
television_N = regN "television" ;
|
television_N = regN "television" ;
|
||||||
thick_A = regADeg "thick" ;
|
thick_A = regADeg "thick" ;
|
||||||
thin_A = regADeg "thin" ;
|
thin_A = duplADeg "thin" ;
|
||||||
train_N = regN "train" ;
|
train_N = regN "train" ;
|
||||||
travel_V = (regDuplV "travel") ;
|
travel_V = (regDuplV "travel") ;
|
||||||
tree_N = regN "tree" ;
|
tree_N = regN "tree" ;
|
||||||
@@ -204,7 +204,7 @@ lin
|
|||||||
white_A = regADeg "white" ;
|
white_A = regADeg "white" ;
|
||||||
window_N = regN "window" ;
|
window_N = regN "window" ;
|
||||||
wine_N = regN "wine" ;
|
wine_N = regN "wine" ;
|
||||||
win_V2 = dirV2 (irregV "win" "won" "won") ;
|
win_V2 = dirV2 (irregDuplV "win" "won" "won") ;
|
||||||
woman_N = mk2N "woman" "women" ;
|
woman_N = mk2N "woman" "women" ;
|
||||||
wonder_VQ = mkVQ (regV "wonder") ;
|
wonder_VQ = mkVQ (regV "wonder") ;
|
||||||
wood_N = regN "wood" ;
|
wood_N = regN "wood" ;
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ abstract MinStructural = Cat ** {
|
|||||||
if_Subj : Subj ;
|
if_Subj : Subj ;
|
||||||
in_Prep : Prep ;
|
in_Prep : Prep ;
|
||||||
that_NP : NP ;
|
that_NP : NP ;
|
||||||
|
this_NP : NP ;
|
||||||
we_Pron : Pron ;
|
we_Pron : Pron ;
|
||||||
whichPl_IDet : IDet ;
|
whichPl_IDet : IDet ;
|
||||||
whichSg_IDet : IDet ;
|
whichSg_IDet : IDet ;
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ incomplete concrete MinStructuralI of MinStructural = open Structural in {
|
|||||||
if_Subj = if_Subj ;
|
if_Subj = if_Subj ;
|
||||||
in_Prep = in_Prep ;
|
in_Prep = in_Prep ;
|
||||||
that_NP = that_NP ;
|
that_NP = that_NP ;
|
||||||
|
this_NP = this_NP ;
|
||||||
we_Pron = we_Pron ;
|
we_Pron = we_Pron ;
|
||||||
whichPl_IDet = whichPl_IDet ;
|
whichPl_IDet = whichPl_IDet ;
|
||||||
whichSg_IDet = whichSg_IDet ;
|
whichSg_IDet = whichSg_IDet ;
|
||||||
|
|||||||
10
lib/resource-1.0/mkTreebank.gfs
Normal file
10
lib/resource-1.0/mkTreebank.gfs
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
-- to create a treebank with 3066 trees, for all languages
|
||||||
|
|
||||||
|
i minimal/MinimalEng.gf
|
||||||
|
gt -depth=4 -cat=S | tb -xml | grep -v "/treebank>" | wf min.xml
|
||||||
|
gt -depth=4 -cat=QS | tb -xml | grep -v "treebank>" | af min.xml
|
||||||
|
gt -depth=3 (UttImpSg ? ?) | tb -xml | grep -v "treebank>" | af min.xml
|
||||||
|
gt -depth=4 -cat=NP | tb -xml | grep -v "<treebank" | af min.xml
|
||||||
|
e
|
||||||
|
i -nocf langs.gfcm
|
||||||
|
rf min.xml | tb -trees | tb -xml | wf langs.xml
|
||||||
@@ -245,11 +245,19 @@ txtHelpFile =
|
|||||||
"\n rf tb.xml | tb -c -- compare-test treebank from file" ++
|
"\n rf tb.xml | tb -c -- compare-test treebank from file" ++
|
||||||
"\n rf old.xml | tb -trees | tb -xml -- create new treebank from old" ++
|
"\n rf old.xml | tb -trees | tb -xml -- create new treebank from old" ++
|
||||||
"\n" ++
|
"\n" ++
|
||||||
"\nlt, lookup_treebank: lt String" ++
|
"\nut, use_treebank: ut String" ++
|
||||||
"\n Lookup a string in a treebank and return the resulting trees." ++
|
"\n Lookup a string in a treebank and return the resulting trees." ++
|
||||||
"\n Use 'tb' to create a treebank and 'i -treebank' to read it in memory." ++
|
"\n Use 'tb' to create a treebank and 'i -treebank' to read one from" ++
|
||||||
"\n flag:" ++
|
"\n a file." ++
|
||||||
"\n -treebank use this treebank (instead of the latest introduced one) TODO" ++
|
"\n options:" ++
|
||||||
|
"\n -assocs show all string-trees associations in the treebank" ++
|
||||||
|
"\n -strings show all strings in the treebank" ++
|
||||||
|
"\n -raw return result as string, without typechecking it" ++
|
||||||
|
"\n flags:" ++
|
||||||
|
"\n -treebank use this treebank (instead of the latest introduced one)" ++
|
||||||
|
"\n examples:" ++
|
||||||
|
"\n ut \"He adds this to that\" | l -multi -- use treebank lookup as parser in translation" ++
|
||||||
|
"\n ut -assocs | grep \"ComplV2\" -- show all associations with ComplV2" ++
|
||||||
"\n" ++
|
"\n" ++
|
||||||
"\ntt, test_tokenizer: tt String" ++
|
"\ntt, test_tokenizer: tt String" ++
|
||||||
"\n Show the token list sent to the parser when String is parsed." ++
|
"\n Show the token list sent to the parser when String is parsed." ++
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ pCommand ws = case ws of
|
|||||||
"cc" : s -> aUnit $ CComputeConcrete $ unwords s
|
"cc" : s -> aUnit $ CComputeConcrete $ unwords s
|
||||||
"so" : s -> aUnit $ CShowOpers $ unwords s
|
"so" : s -> aUnit $ CShowOpers $ unwords s
|
||||||
"tb" : [] -> aUnit CTreeBank
|
"tb" : [] -> aUnit CTreeBank
|
||||||
"lt" : s -> aString CLookupTreebank s
|
"ut" : s -> aString CLookupTreebank s
|
||||||
|
|
||||||
"tq" : i:o:[] -> aUnit (CTranslationQuiz (language i) (language o))
|
"tq" : i:o:[] -> aUnit (CTranslationQuiz (language i) (language o))
|
||||||
"tl":i:o:[] -> aUnit (CTranslationList (language i) (language o))
|
"tl":i:o:[] -> aUnit (CTranslationList (language i) (language o))
|
||||||
|
|||||||
16
src/HelpFile
16
src/HelpFile
@@ -216,11 +216,19 @@ tb, tree_bank: tb
|
|||||||
rf tb.xml | tb -c -- compare-test treebank from file
|
rf tb.xml | tb -c -- compare-test treebank from file
|
||||||
rf old.xml | tb -trees | tb -xml -- create new treebank from old
|
rf old.xml | tb -trees | tb -xml -- create new treebank from old
|
||||||
|
|
||||||
lt, lookup_treebank: lt String
|
ut, use_treebank: ut String
|
||||||
Lookup a string in a treebank and return the resulting trees.
|
Lookup a string in a treebank and return the resulting trees.
|
||||||
Use 'tb' to create a treebank and 'i -treebank' to read it in memory.
|
Use 'tb' to create a treebank and 'i -treebank' to read one from
|
||||||
flag:
|
a file.
|
||||||
-treebank use this treebank (instead of the latest introduced one) TODO
|
options:
|
||||||
|
-assocs show all string-trees associations in the treebank
|
||||||
|
-strings show all strings in the treebank
|
||||||
|
-raw return result as string, without typechecking it
|
||||||
|
flags:
|
||||||
|
-treebank use this treebank (instead of the latest introduced one)
|
||||||
|
examples:
|
||||||
|
ut "He adds this to that" | l -multi -- use treebank lookup as parser in translation
|
||||||
|
ut -assocs | grep "ComplV2" -- show all associations with ComplV2
|
||||||
|
|
||||||
tt, test_tokenizer: tt String
|
tt, test_tokenizer: tt String
|
||||||
Show the token list sent to the parser when String is parsed.
|
Show the token list sent to the parser when String is parsed.
|
||||||
|
|||||||
Reference in New Issue
Block a user