forked from GitHub/gf-core
checked top-1000 BNC senses in Swe, with some split senses added to Dictionary and DictionaryEng. Wrote bnc-dict-log.txt to describe the procedure, which should be reproducible to other languages now.
This commit is contained in:
@@ -22,6 +22,7 @@ concrete IrregSwe of IrregSweAbs = CatSwe ** open ParadigmsSwe in {
|
||||
avskriva_V = irregV "avskriva" "avskrev" "avskrivit" ;
|
||||
avstiga_V = irregV "avstiga" "avsteg" "avstigit" ;
|
||||
bära_V = irregV "bära" "bar" "burit" ;
|
||||
bedja_V = irregV "be" "bad" "bett" ;
|
||||
bedraga_V = irregV "bedraga" "bedrog" "bedragit" ;
|
||||
bedriva_V = irregV "bedriva" "bedrev" "bedrivit" ;
|
||||
befinna_V = irregV "befinna" "befann" "befunnit" ;
|
||||
|
||||
@@ -17,6 +17,7 @@ abstract IrregSweAbs = Cat ** {
|
||||
avskriva_V : V ;
|
||||
avstiga_V : V ;
|
||||
bära_V : V ;
|
||||
bedja_V : V ;
|
||||
bedraga_V : V ;
|
||||
bedriva_V : V ;
|
||||
befinna_V : V ;
|
||||
|
||||
@@ -3121,7 +3121,9 @@ fun applied_A : A ;
|
||||
fun applique_N : N ;
|
||||
fun applique_V2 : V2 ;
|
||||
fun apply_V : V ;
|
||||
fun apply_V2 : V2 ;
|
||||
fun apply_V2 : V2 ; ---
|
||||
fun apply_1_V2 : V2 ; -- | we apply the newest methods
|
||||
fun apply_2_V2 : V2 ; -- | we apply for a job
|
||||
fun apply_V2V : V2V ;
|
||||
fun apply_VV : VV ;
|
||||
fun appoint_V2 : V2 ;
|
||||
@@ -3443,7 +3445,9 @@ fun arity_N : N ;
|
||||
fun arizona_PN : PN ;
|
||||
fun ark_N : N ;
|
||||
fun arkansas_PN : PN ;
|
||||
fun arm_N : N ;
|
||||
fun arm_N : N ; ---
|
||||
fun arm_1_N : N ; -- | arms and legs
|
||||
fun arm_2_N : N ; -- | a farewell to arms
|
||||
fun arm_V : V ;
|
||||
fun arm_V2 : V2 ;
|
||||
fun arm_hole_N : N ;
|
||||
@@ -12428,7 +12432,8 @@ fun condescendingly_Adv : Adv ;
|
||||
fun condescension_N : N ;
|
||||
fun condign_A : A ;
|
||||
fun condiment_N : N ;
|
||||
fun condition_N : N ;
|
||||
fun condition_1_N : N ; -- | there is one condition
|
||||
fun condition_2_N : N ; -- | he is in a bad condition
|
||||
fun condition_V2 : V2 ;
|
||||
fun conditional_A : A ;
|
||||
fun conditionality_N : N ;
|
||||
@@ -17813,7 +17818,8 @@ fun draughtsman_N : N ;
|
||||
fun draughty_A : A ;
|
||||
fun draw_N : N ;
|
||||
fun draw_V : V ;
|
||||
fun draw_V2 : V2 ;
|
||||
fun draw_1_V2 : V2 ; -- | draw the carriage
|
||||
fun draw_2_V2 : V2 ; -- | draw a picture
|
||||
fun draw_V2V : V2V ;
|
||||
fun draw_back_V2 : V2 ;
|
||||
fun draw_down_V2 : V2 ;
|
||||
@@ -23608,8 +23614,9 @@ fun gamboge_N : N ;
|
||||
fun gambol_N : N ;
|
||||
fun gambol_V : V ;
|
||||
fun gambrel_N : N ;
|
||||
fun game_1_N : N ; -- | play a game
|
||||
fun game_2_N : N ; -- | eat game
|
||||
fun game_1_N : N ; -- | card game
|
||||
fun game_2_N : N ; -- | children's game
|
||||
fun game_3_N : N ; -- | eat game
|
||||
fun game_A : A ;
|
||||
fun game_N : N ;
|
||||
fun game_V : V ;
|
||||
@@ -28847,7 +28854,8 @@ fun imputation_N : N ;
|
||||
fun impute_V2 : V2 ;
|
||||
fun imputrescible_A : A ;
|
||||
fun in_A : A ;
|
||||
fun in_Adv : Adv ;
|
||||
fun in_1_Adv : Adv ; -- | he is in
|
||||
fun in_2_Adv : Adv ; -- | he goes in
|
||||
fun in_N : N ;
|
||||
fun in_Prep : Prep ;
|
||||
fun in_accordance_with_Prep : Prep ;
|
||||
@@ -32197,7 +32205,8 @@ fun lassie_N : N ;
|
||||
fun lassitude_N : N ;
|
||||
fun lasso_N : N ;
|
||||
fun lasso_V2 : V2 ;
|
||||
fun last_A : A ;
|
||||
fun last_1_A : A ; -- | the last supper
|
||||
fun last_2_A : A ; -- | last week
|
||||
fun last_Adv : Adv ;
|
||||
fun last_N : N ;
|
||||
fun last_V : V ;
|
||||
@@ -32895,8 +32904,8 @@ fun lidar_N : N ;
|
||||
fun lidded_A : A ;
|
||||
fun lidless_A : A ;
|
||||
fun lido_N : N ;
|
||||
fun lie_1_V : V ;
|
||||
fun lie_2_V : V ;
|
||||
fun lie_1_V : V ; -- | lie on the ground
|
||||
fun lie_2_V : V ; -- | tell a lie
|
||||
fun lie_N : N ;
|
||||
fun lie_VS : VS ;
|
||||
fun lie_abed_N : N ;
|
||||
@@ -34834,7 +34843,8 @@ fun marke_up_V : V ;
|
||||
fun marked_A : A ;
|
||||
fun markedly_Adv : Adv ;
|
||||
fun marker_N : N ;
|
||||
fun market_N : N ;
|
||||
fun market_1_N : N ; -- | fish market (place)
|
||||
fun market_2_N : N ; -- | stock market (abstract)
|
||||
fun market_V : V ;
|
||||
fun market_V2 : V2 ;
|
||||
fun market_cross_N : N ;
|
||||
@@ -35112,7 +35122,8 @@ fun matsyendra_N : N ;
|
||||
fun matt_A : A ;
|
||||
fun matt_PN : PN ;
|
||||
fun matte_N : N ;
|
||||
fun matter_N : N ;
|
||||
fun matter_1_N : N ; -- | matter and form
|
||||
fun matter_2_N : N ; -- | what is the matter
|
||||
fun matter_V : V ;
|
||||
fun matter_of_course_A : A ;
|
||||
fun matter_of_fact_A : A ;
|
||||
@@ -49347,7 +49358,8 @@ fun rook_N : N ;
|
||||
fun rook_V2 : V2 ;
|
||||
fun rookery_N : N ;
|
||||
fun rookie_N : N ;
|
||||
fun room_N : N ;
|
||||
fun room_1_N : N ; -- | five rooms
|
||||
fun room_2_N : N ; -- | there is room for five
|
||||
fun room_V : V ;
|
||||
fun room_in_V2 : V2 ;
|
||||
fun room_mate_N : N ;
|
||||
@@ -58339,7 +58351,8 @@ fun tell_V2 : V2 ;
|
||||
fun tell_V2Q : V2Q ;
|
||||
fun tell_V2S : V2S ;
|
||||
fun tell_V2V : V2V ;
|
||||
fun tell_V3 : V3 ;
|
||||
fun tell_1_V3 : V3 ; -- | tell him a story
|
||||
fun tell_2_V3 : V3 ; -- | tell heaven from hell
|
||||
fun tell_VS : VS ;
|
||||
fun tell_VV : VV ;
|
||||
fun tell_apart_V2 : V2 ;
|
||||
@@ -64358,7 +64371,8 @@ fun waster_N : N ;
|
||||
fun wastrel_N : N ;
|
||||
fun watch_N : N ;
|
||||
fun watch_V : V ;
|
||||
fun watch_V2 : V2 ;
|
||||
fun watch_1_V2 : V2 ; -- | watch the tv
|
||||
fun watch_2_V2 : V2 ; -- | watch the bank
|
||||
fun watch_V2V : V2V ;
|
||||
fun watch_VS : VS ;
|
||||
fun watch_chain_N : N ;
|
||||
|
||||
@@ -3125,6 +3125,8 @@ lin applique_N = mkN "appliqué" ;
|
||||
lin applique_V2 = mkV2 (mkV "appliqué" "appliqués" "appliquéed" "appliquéed" "appliquéing");
|
||||
lin apply_V = mkV "apply" "applies" "applied" "applied" "applying";
|
||||
lin apply_V2 = mkV2 (mkV "apply" "applies" "applied" "applied" "applying");
|
||||
lin apply_1_V2 = mkV2 (mkV "apply" "applies" "applied" "applied" "applying");
|
||||
lin apply_2_V2 = mkV2 (mkV "apply" "applies" "applied" "applied" "applying") for_Prep ;
|
||||
lin apply_V2V = mkV2V (mkV "apply" "applies" "applied" "applied" "applying") noPrep to_Prep ;
|
||||
lin apply_VV = mkVV (mkV "apply" "applies" "applied" "applied" "applying");
|
||||
lin appoint_V2 = mkV2 (mkV "appoint" "appoints" "appointed" "appointed" "appointing");
|
||||
@@ -3447,6 +3449,8 @@ lin arizona_PN = mkPN "Arizona";
|
||||
lin ark_N = mkN "ark" "arks";
|
||||
lin arkansas_PN = mkPN "Arkansas";
|
||||
lin arm_N = mkN "arm" "arms";
|
||||
lin arm_1_N = mkN "arm" "arms";
|
||||
lin arm_2_N = mkN "arm" "arms";
|
||||
lin arm_V = mkV "arm" "arms" "armed" "armed" "arming";
|
||||
lin arm_V2 = mkV2 (mkV "arm" "arms" "armed" "armed" "arming");
|
||||
lin arm_hole_N = mkN "arm-hole" "arm-holes";
|
||||
@@ -12431,6 +12435,8 @@ lin condescension_N = mkN "condescension" "condescensions";
|
||||
lin condign_A = compoundA (mkA "condign");
|
||||
lin condiment_N = mkN "condiment" "condiments";
|
||||
lin condition_N = mkN "condition" "conditions";
|
||||
lin condition_1_N = mkN "condition" "conditions";
|
||||
lin condition_2_N = mkN "condition" "conditions";
|
||||
lin condition_V2 = mkV2 (mkV "condition" "conditions" "conditioned" "conditioned" "conditioning");
|
||||
lin conditional_A = compoundA (mkA "conditional");
|
||||
lin conditionality_N = mkN "conditionality" ;
|
||||
@@ -17813,6 +17819,8 @@ lin draughty_A = mkA "draughty" "draughtier";
|
||||
lin draw_N = mkN "draw" "draws";
|
||||
lin draw_V = IrregEng.draw_V;
|
||||
lin draw_V2 = mkV2 (IrregEng.draw_V);
|
||||
lin draw_1_V2 = mkV2 (IrregEng.draw_V);
|
||||
lin draw_2_V2 = mkV2 (IrregEng.draw_V);
|
||||
lin draw_V2V = mkV2V (IrregEng.draw_V) noPrep to_Prep ;
|
||||
lin draw_back_V2 = mkV2 (partV IrregEng.draw_V "back");
|
||||
lin draw_down_V2 = mkV2 (partV IrregEng.draw_V "down");
|
||||
@@ -23608,6 +23616,7 @@ lin gambol_V = mkV "gambol" "gambols" "gambolled" "gambolled" "gambolling";
|
||||
lin gambrel_N = mkN "gambrel" ;
|
||||
lin game_1_N = mkN "game" ;
|
||||
lin game_2_N = mkN "game" ;
|
||||
lin game_3_N = mkN "game" ;
|
||||
lin game_A = compoundA (mkA "game");
|
||||
lin game_N = mkN "game" "games";
|
||||
lin game_V = mkV "game" "games" "gamed" "gamed" "gaming";
|
||||
@@ -28846,6 +28855,8 @@ lin impute_V2 = mkV2 (mkV "impute" "imputes" "imputed" "imputed" "imputing");
|
||||
lin imputrescible_A = mkA "imputrescible" ;
|
||||
lin in_A = mkA "in" ;
|
||||
lin in_Adv = mkAdv "in";
|
||||
lin in_1_Adv = mkAdv "in";
|
||||
lin in_2_Adv = mkAdv "in";
|
||||
lin in_N = mkN "in" "ins";
|
||||
lin in_Prep = mkPrep "in";
|
||||
lin in_accordance_with_Prep = mkPrep "in accordance with";
|
||||
@@ -32194,6 +32205,8 @@ lin lassitude_N = mkN "lassitude" ;
|
||||
lin lasso_N = mkN "lasso" "lassos";
|
||||
lin lasso_V2 = mkV2 (mkV "lasso" "lassos" "lassoed" "lassoed" "lassoing");
|
||||
lin last_A = irregAdv (mkA "last") "last";
|
||||
lin last_1_A = irregAdv (mkA "last") "last";
|
||||
lin last_2_A = irregAdv (mkA "last") "last";
|
||||
lin last_Adv = mkAdv "last" ;
|
||||
lin last_N = mkN "last" ;
|
||||
lin last_V = mkV "last" "lasts" "lasted" "lasted" "lasting";
|
||||
@@ -34830,6 +34843,8 @@ lin marked_A = mkA "marked" ;
|
||||
lin markedly_Adv = mkAdv "markedly" ;
|
||||
lin marker_N = mkN "marker" "markers";
|
||||
lin market_N = mkN "market" "markets";
|
||||
lin market_1_N = mkN "market" "markets";
|
||||
lin market_2_N = mkN "market" "markets";
|
||||
lin market_V = mkV "market" "markets" "marketed" "marketed" "marketing";
|
||||
lin market_V2 = mkV2 (mkV "market" "markets" "marketed" "marketed" "marketing");
|
||||
lin market_cross_N = mkN "market-cross" "market-crosses";
|
||||
@@ -35108,6 +35123,8 @@ lin matt_A = compoundA (mkA "matt");
|
||||
lin matt_PN = mkPN "Matt";
|
||||
lin matte_N = mkN "matte" ;
|
||||
lin matter_N = mkN "matter" "matters";
|
||||
lin matter_1_N = mkN "matter" "matters";
|
||||
lin matter_2_N = mkN "matter" "matters";
|
||||
lin matter_V = mkV "matter" "matters" "mattered" "mattered" "mattering";
|
||||
lin matter_of_course_A = compoundA (mkA "matter-of-course");
|
||||
lin matter_of_fact_A = compoundA (mkA "matter-of-fact");
|
||||
@@ -49342,6 +49359,8 @@ lin rook_V2 = mkV2 (mkV "rook" "rooks" "rooked" "rooked" "rooking");
|
||||
lin rookery_N = mkN "rookery" "rookeries";
|
||||
lin rookie_N = mkN "rookie" "rookies";
|
||||
lin room_N = mkN "room" "rooms";
|
||||
lin room_1_N = mkN "room" "rooms";
|
||||
lin room_2_N = mkN "room" "rooms";
|
||||
lin room_V = mkV "room" "rooms" "roomed" "roomed" "rooming";
|
||||
lin room_in_V2 = mkV2 (partV (mkV "room") "in");
|
||||
lin room_mate_N = mkN "room-mate" "room-mates";
|
||||
@@ -58335,6 +58354,8 @@ lin tell_V2Q = mkV2Q (IrregEng.tell_V) noPrep;
|
||||
lin tell_V2S = mkV2S (IrregEng.tell_V) noPrep;
|
||||
lin tell_V2V = mkV2V (IrregEng.tell_V) noPrep to_Prep;
|
||||
lin tell_V3 = mkV3 (IrregEng.tell_V) noPrep noPrep;
|
||||
lin tell_1_V3 = mkV3 (IrregEng.tell_V) noPrep noPrep;
|
||||
lin tell_2_V3 = mkV3 (IrregEng.tell_V) noPrep noPrep;
|
||||
lin tell_VS = mkVS (IrregEng.tell_V);
|
||||
lin tell_VV = mkVV (IrregEng.tell_V);
|
||||
lin tell_apart_V2 = mkV2 (partV IrregEng.tell_V "apart");
|
||||
@@ -64353,6 +64374,8 @@ lin wastrel_N = mkN "wastrel" "wastrels";
|
||||
lin watch_N = mkN "watch" "watches";
|
||||
lin watch_V = mkV "watch" "watches" "watched" "watched" "watching";
|
||||
lin watch_V2 = mkV2 (mkV "watch" "watches" "watched" "watched" "watching");
|
||||
lin watch_1_V2 = mkV2 (mkV "watch" "watches" "watched" "watched" "watching");
|
||||
lin watch_2_V2 = mkV2 (mkV "watch" "watches" "watched" "watched" "watching");
|
||||
lin watch_V2V = mkV2V (mkV "watch" "watches" "watched" "watched" "watching") noPrep to_Prep ;
|
||||
lin watch_VS = mkVS (mkV "watch" "watches" "watched" "watched" "watching");
|
||||
lin watch_chain_N = mkN "watch-chain" "watch-chains";
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
60
lib/src/translator/bnc-dict-log.txt
Normal file
60
lib/src/translator/bnc-dict-log.txt
Normal file
@@ -0,0 +1,60 @@
|
||||
1. Create a check list for Swe
|
||||
|
||||
do
|
||||
bnc <- readFile "bnc-to-check.txt" >>= return . words -- list of BNC funs
|
||||
dict <- readFile "DictionarySwe.gf" >>= return . map words . lines -- current Swe lexicon
|
||||
let dictmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- dict]
|
||||
let bncdict = [(f,maybe "variants{} ;" id $ Data.Map.lookup f dictmap) | f <- bnc] -- current Swe for BNC
|
||||
writeFile "bncswe.txt" $ unlines [unwords ("lin":f:"=":[ws]) | (f,ws) <- bncdict] -- print inspectable file
|
||||
|
||||
|
||||
2. Inspect the check list
|
||||
|
||||
went through one thousand
|
||||
- corrected everything
|
||||
- removed trailing comments from corrected entries
|
||||
- split senses
|
||||
- added a -- | comment for disambiguating new senses
|
||||
|
||||
move the checked words to correctswe.txt
|
||||
|
||||
|
||||
3. Apply split senses
|
||||
|
||||
grep "\-\- |" correctswe.txt | sort
|
||||
|
||||
Copy split senses to bnc-to-check.txt
|
||||
- *but don't remove the unsplit senses* because they are needed to find words from other languages
|
||||
|
||||
Copy split senses to Dictionary.gf, together with the -- | comments
|
||||
|
||||
Make copies for split senses in DictionaryEng.gf
|
||||
|
||||
Verify the result by compiling DictionaryEng.gf
|
||||
|
||||
|
||||
4. Extend the Swe lexicon
|
||||
|
||||
do
|
||||
old <- readFile "DictionarySwe.gf" >>= return . map words . lines -- read old lexicon
|
||||
new <- readFile "correctswe.txt" >>= return . map words . lines -- read corrected and new words
|
||||
let oldmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- old]
|
||||
let newlist = [(f,unwords (takeWhile (/= "--") ws)) | "lin":f:"=":ws <- new] -- drop comments from corrected words
|
||||
let newmap = foldr (uncurry Data.Map.insert) oldmap newlist -- insert corrected words
|
||||
writeFile "newswe.txt" $ unlines [unwords ("lin":f:"=":[ws]) | (f,ws) <- Data.Map.assocs newmap] -- print lin rules
|
||||
|
||||
Replace the body of DictionarySwe.gf by newswe.txt
|
||||
|
||||
Compile DictionarySwe.gf
|
||||
|
||||
|
||||
5. Spare the rest to do
|
||||
|
||||
Remove the corrected words from bncswe.txt.
|
||||
Or take note of the last word that was checked already.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -90,6 +90,8 @@ more_Adv
|
||||
about_Prep
|
||||
now_Adv
|
||||
last_A
|
||||
last_1_A
|
||||
last_2_A
|
||||
other_A
|
||||
give_V3
|
||||
give_V2
|
||||
@@ -140,6 +142,8 @@ thing_N
|
||||
tell_VV
|
||||
tell_VS
|
||||
tell_V3
|
||||
tell_1_V3
|
||||
tell_2_V3
|
||||
tell_V2V
|
||||
tell_V2S
|
||||
tell_V2Q
|
||||
@@ -183,7 +187,6 @@ leave_V2
|
||||
leave_V
|
||||
life_N
|
||||
great_A
|
||||
where_Adv
|
||||
case_N
|
||||
woman_N
|
||||
over_Adv
|
||||
@@ -263,6 +266,8 @@ about_Adv
|
||||
something_NP
|
||||
school_N
|
||||
in_Adv
|
||||
in_1_Adv
|
||||
in_2_Adv
|
||||
small_A
|
||||
place_N
|
||||
before_Prep
|
||||
@@ -406,6 +411,8 @@ hear_V2V
|
||||
hear_V2
|
||||
hear_V
|
||||
room_N
|
||||
room_1_N
|
||||
room_2_N
|
||||
whether_Subj
|
||||
water_N
|
||||
form_N
|
||||
@@ -498,6 +505,8 @@ sit_VA
|
||||
sit_V2
|
||||
sit_V
|
||||
market_N
|
||||
market_1_N
|
||||
market_2_N
|
||||
appear_VV
|
||||
appear_VS
|
||||
appear_VA
|
||||
@@ -614,6 +623,8 @@ low_A
|
||||
cost_N
|
||||
little_Det
|
||||
matter_N
|
||||
matter_1_N
|
||||
matter_2_N
|
||||
community_N
|
||||
remain_VV
|
||||
remain_VS
|
||||
@@ -685,6 +696,8 @@ spend_V2
|
||||
spend_V
|
||||
force_N
|
||||
condition_N
|
||||
condition_1_N
|
||||
condition_2_N
|
||||
paper_N
|
||||
off_Prep
|
||||
major_A
|
||||
@@ -756,6 +769,8 @@ management_N
|
||||
morning_N
|
||||
draw_V2V
|
||||
draw_V2
|
||||
draw_1_V2
|
||||
draw_2_V2
|
||||
draw_V
|
||||
hope_VV
|
||||
hope_VS
|
||||
@@ -782,6 +797,7 @@ foot_N
|
||||
clear_A
|
||||
boy_N
|
||||
game_N
|
||||
game_3_N
|
||||
game_2_N
|
||||
game_1_N
|
||||
food_N
|
||||
@@ -826,6 +842,8 @@ cause_VS
|
||||
cause_V2V
|
||||
cause_V2
|
||||
arm_N
|
||||
arm_1_N
|
||||
arm_2_N
|
||||
history_N
|
||||
parent_N
|
||||
land_N
|
||||
@@ -833,6 +851,8 @@ trade_N
|
||||
watch_VS
|
||||
watch_V2V
|
||||
watch_V2
|
||||
watch_1_V2
|
||||
watch_2_V2
|
||||
watch_V
|
||||
white_A
|
||||
situation_N
|
||||
@@ -882,6 +902,8 @@ cover_V2
|
||||
apply_VV
|
||||
apply_V2V
|
||||
apply_V2
|
||||
apply_1_V2
|
||||
apply_2_V2
|
||||
apply_V
|
||||
project_N
|
||||
raise_V2V
|
||||
|
||||
6825
lib/src/translator/bncswe.txt
Normal file
6825
lib/src/translator/bncswe.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user