1
0
forked from GitHub/gf-core

checked top-1000 BNC senses in Swe, with some split senses added to Dictionary and DictionaryEng. Wrote bnc-dict-log.txt to describe the procedure, which should be reproducible to other languages now.

This commit is contained in:
aarne
2014-03-30 16:28:40 +00:00
parent e96d222c41
commit 154a65cc3e
8 changed files with 7404 additions and 424 deletions

View File

@@ -22,6 +22,7 @@ concrete IrregSwe of IrregSweAbs = CatSwe ** open ParadigmsSwe in {
avskriva_V = irregV "avskriva" "avskrev" "avskrivit" ;
avstiga_V = irregV "avstiga" "avsteg" "avstigit" ;
bära_V = irregV "bära" "bar" "burit" ;
bedja_V = irregV "be" "bad" "bett" ;
bedraga_V = irregV "bedraga" "bedrog" "bedragit" ;
bedriva_V = irregV "bedriva" "bedrev" "bedrivit" ;
befinna_V = irregV "befinna" "befann" "befunnit" ;

View File

@@ -17,6 +17,7 @@ abstract IrregSweAbs = Cat ** {
avskriva_V : V ;
avstiga_V : V ;
bära_V : V ;
bedja_V : V ;
bedraga_V : V ;
bedriva_V : V ;
befinna_V : V ;

View File

@@ -3121,7 +3121,9 @@ fun applied_A : A ;
fun applique_N : N ;
fun applique_V2 : V2 ;
fun apply_V : V ;
fun apply_V2 : V2 ;
fun apply_V2 : V2 ; ---
fun apply_1_V2 : V2 ; -- | we apply the newest methods
fun apply_2_V2 : V2 ; -- | we apply for a job
fun apply_V2V : V2V ;
fun apply_VV : VV ;
fun appoint_V2 : V2 ;
@@ -3443,7 +3445,9 @@ fun arity_N : N ;
fun arizona_PN : PN ;
fun ark_N : N ;
fun arkansas_PN : PN ;
fun arm_N : N ;
fun arm_N : N ; ---
fun arm_1_N : N ; -- | arms and legs
fun arm_2_N : N ; -- | a farewell to arms
fun arm_V : V ;
fun arm_V2 : V2 ;
fun arm_hole_N : N ;
@@ -12428,7 +12432,8 @@ fun condescendingly_Adv : Adv ;
fun condescension_N : N ;
fun condign_A : A ;
fun condiment_N : N ;
fun condition_N : N ;
fun condition_1_N : N ; -- | there is one condition
fun condition_2_N : N ; -- | he is in a bad condition
fun condition_V2 : V2 ;
fun conditional_A : A ;
fun conditionality_N : N ;
@@ -17813,7 +17818,8 @@ fun draughtsman_N : N ;
fun draughty_A : A ;
fun draw_N : N ;
fun draw_V : V ;
fun draw_V2 : V2 ;
fun draw_1_V2 : V2 ; -- | draw the carriage
fun draw_2_V2 : V2 ; -- | draw a picture
fun draw_V2V : V2V ;
fun draw_back_V2 : V2 ;
fun draw_down_V2 : V2 ;
@@ -23608,8 +23614,9 @@ fun gamboge_N : N ;
fun gambol_N : N ;
fun gambol_V : V ;
fun gambrel_N : N ;
fun game_1_N : N ; -- | play a game
fun game_2_N : N ; -- | eat game
fun game_1_N : N ; -- | card game
fun game_2_N : N ; -- | children's game
fun game_3_N : N ; -- | eat game
fun game_A : A ;
fun game_N : N ;
fun game_V : V ;
@@ -28847,7 +28854,8 @@ fun imputation_N : N ;
fun impute_V2 : V2 ;
fun imputrescible_A : A ;
fun in_A : A ;
fun in_Adv : Adv ;
fun in_1_Adv : Adv ; -- | he is in
fun in_2_Adv : Adv ; -- | he goes in
fun in_N : N ;
fun in_Prep : Prep ;
fun in_accordance_with_Prep : Prep ;
@@ -32197,7 +32205,8 @@ fun lassie_N : N ;
fun lassitude_N : N ;
fun lasso_N : N ;
fun lasso_V2 : V2 ;
fun last_A : A ;
fun last_1_A : A ; -- | the last supper
fun last_2_A : A ; -- | last week
fun last_Adv : Adv ;
fun last_N : N ;
fun last_V : V ;
@@ -32895,8 +32904,8 @@ fun lidar_N : N ;
fun lidded_A : A ;
fun lidless_A : A ;
fun lido_N : N ;
fun lie_1_V : V ;
fun lie_2_V : V ;
fun lie_1_V : V ; -- | lie on the ground
fun lie_2_V : V ; -- | tell a lie
fun lie_N : N ;
fun lie_VS : VS ;
fun lie_abed_N : N ;
@@ -34834,7 +34843,8 @@ fun marke_up_V : V ;
fun marked_A : A ;
fun markedly_Adv : Adv ;
fun marker_N : N ;
fun market_N : N ;
fun market_1_N : N ; -- | fish market (place)
fun market_2_N : N ; -- | stock market (abstract)
fun market_V : V ;
fun market_V2 : V2 ;
fun market_cross_N : N ;
@@ -35112,7 +35122,8 @@ fun matsyendra_N : N ;
fun matt_A : A ;
fun matt_PN : PN ;
fun matte_N : N ;
fun matter_N : N ;
fun matter_1_N : N ; -- | matter and form
fun matter_2_N : N ; -- | what is the matter
fun matter_V : V ;
fun matter_of_course_A : A ;
fun matter_of_fact_A : A ;
@@ -49347,7 +49358,8 @@ fun rook_N : N ;
fun rook_V2 : V2 ;
fun rookery_N : N ;
fun rookie_N : N ;
fun room_N : N ;
fun room_1_N : N ; -- | five rooms
fun room_2_N : N ; -- | there is room for five
fun room_V : V ;
fun room_in_V2 : V2 ;
fun room_mate_N : N ;
@@ -58339,7 +58351,8 @@ fun tell_V2 : V2 ;
fun tell_V2Q : V2Q ;
fun tell_V2S : V2S ;
fun tell_V2V : V2V ;
fun tell_V3 : V3 ;
fun tell_1_V3 : V3 ; -- | tell him a story
fun tell_2_V3 : V3 ; -- | tell heaven from hell
fun tell_VS : VS ;
fun tell_VV : VV ;
fun tell_apart_V2 : V2 ;
@@ -64358,7 +64371,8 @@ fun waster_N : N ;
fun wastrel_N : N ;
fun watch_N : N ;
fun watch_V : V ;
fun watch_V2 : V2 ;
fun watch_1_V2 : V2 ; -- | watch the tv
fun watch_2_V2 : V2 ; -- | watch the bank
fun watch_V2V : V2V ;
fun watch_VS : VS ;
fun watch_chain_N : N ;

View File

@@ -3125,6 +3125,8 @@ lin applique_N = mkN "appliqué" ;
lin applique_V2 = mkV2 (mkV "appliqué" "appliqués" "appliquéed" "appliquéed" "appliquéing");
lin apply_V = mkV "apply" "applies" "applied" "applied" "applying";
lin apply_V2 = mkV2 (mkV "apply" "applies" "applied" "applied" "applying");
lin apply_1_V2 = mkV2 (mkV "apply" "applies" "applied" "applied" "applying");
lin apply_2_V2 = mkV2 (mkV "apply" "applies" "applied" "applied" "applying") for_Prep ;
lin apply_V2V = mkV2V (mkV "apply" "applies" "applied" "applied" "applying") noPrep to_Prep ;
lin apply_VV = mkVV (mkV "apply" "applies" "applied" "applied" "applying");
lin appoint_V2 = mkV2 (mkV "appoint" "appoints" "appointed" "appointed" "appointing");
@@ -3447,6 +3449,8 @@ lin arizona_PN = mkPN "Arizona";
lin ark_N = mkN "ark" "arks";
lin arkansas_PN = mkPN "Arkansas";
lin arm_N = mkN "arm" "arms";
lin arm_1_N = mkN "arm" "arms";
lin arm_2_N = mkN "arm" "arms";
lin arm_V = mkV "arm" "arms" "armed" "armed" "arming";
lin arm_V2 = mkV2 (mkV "arm" "arms" "armed" "armed" "arming");
lin arm_hole_N = mkN "arm-hole" "arm-holes";
@@ -12431,6 +12435,8 @@ lin condescension_N = mkN "condescension" "condescensions";
lin condign_A = compoundA (mkA "condign");
lin condiment_N = mkN "condiment" "condiments";
lin condition_N = mkN "condition" "conditions";
lin condition_1_N = mkN "condition" "conditions";
lin condition_2_N = mkN "condition" "conditions";
lin condition_V2 = mkV2 (mkV "condition" "conditions" "conditioned" "conditioned" "conditioning");
lin conditional_A = compoundA (mkA "conditional");
lin conditionality_N = mkN "conditionality" ;
@@ -17813,6 +17819,8 @@ lin draughty_A = mkA "draughty" "draughtier";
lin draw_N = mkN "draw" "draws";
lin draw_V = IrregEng.draw_V;
lin draw_V2 = mkV2 (IrregEng.draw_V);
lin draw_1_V2 = mkV2 (IrregEng.draw_V);
lin draw_2_V2 = mkV2 (IrregEng.draw_V);
lin draw_V2V = mkV2V (IrregEng.draw_V) noPrep to_Prep ;
lin draw_back_V2 = mkV2 (partV IrregEng.draw_V "back");
lin draw_down_V2 = mkV2 (partV IrregEng.draw_V "down");
@@ -23608,6 +23616,7 @@ lin gambol_V = mkV "gambol" "gambols" "gambolled" "gambolled" "gambolling";
lin gambrel_N = mkN "gambrel" ;
lin game_1_N = mkN "game" ;
lin game_2_N = mkN "game" ;
lin game_3_N = mkN "game" ;
lin game_A = compoundA (mkA "game");
lin game_N = mkN "game" "games";
lin game_V = mkV "game" "games" "gamed" "gamed" "gaming";
@@ -28846,6 +28855,8 @@ lin impute_V2 = mkV2 (mkV "impute" "imputes" "imputed" "imputed" "imputing");
lin imputrescible_A = mkA "imputrescible" ;
lin in_A = mkA "in" ;
lin in_Adv = mkAdv "in";
lin in_1_Adv = mkAdv "in";
lin in_2_Adv = mkAdv "in";
lin in_N = mkN "in" "ins";
lin in_Prep = mkPrep "in";
lin in_accordance_with_Prep = mkPrep "in accordance with";
@@ -32194,6 +32205,8 @@ lin lassitude_N = mkN "lassitude" ;
lin lasso_N = mkN "lasso" "lassos";
lin lasso_V2 = mkV2 (mkV "lasso" "lassos" "lassoed" "lassoed" "lassoing");
lin last_A = irregAdv (mkA "last") "last";
lin last_1_A = irregAdv (mkA "last") "last";
lin last_2_A = irregAdv (mkA "last") "last";
lin last_Adv = mkAdv "last" ;
lin last_N = mkN "last" ;
lin last_V = mkV "last" "lasts" "lasted" "lasted" "lasting";
@@ -34830,6 +34843,8 @@ lin marked_A = mkA "marked" ;
lin markedly_Adv = mkAdv "markedly" ;
lin marker_N = mkN "marker" "markers";
lin market_N = mkN "market" "markets";
lin market_1_N = mkN "market" "markets";
lin market_2_N = mkN "market" "markets";
lin market_V = mkV "market" "markets" "marketed" "marketed" "marketing";
lin market_V2 = mkV2 (mkV "market" "markets" "marketed" "marketed" "marketing");
lin market_cross_N = mkN "market-cross" "market-crosses";
@@ -35108,6 +35123,8 @@ lin matt_A = compoundA (mkA "matt");
lin matt_PN = mkPN "Matt";
lin matte_N = mkN "matte" ;
lin matter_N = mkN "matter" "matters";
lin matter_1_N = mkN "matter" "matters";
lin matter_2_N = mkN "matter" "matters";
lin matter_V = mkV "matter" "matters" "mattered" "mattered" "mattering";
lin matter_of_course_A = compoundA (mkA "matter-of-course");
lin matter_of_fact_A = compoundA (mkA "matter-of-fact");
@@ -49342,6 +49359,8 @@ lin rook_V2 = mkV2 (mkV "rook" "rooks" "rooked" "rooked" "rooking");
lin rookery_N = mkN "rookery" "rookeries";
lin rookie_N = mkN "rookie" "rookies";
lin room_N = mkN "room" "rooms";
lin room_1_N = mkN "room" "rooms";
lin room_2_N = mkN "room" "rooms";
lin room_V = mkV "room" "rooms" "roomed" "roomed" "rooming";
lin room_in_V2 = mkV2 (partV (mkV "room") "in");
lin room_mate_N = mkN "room-mate" "room-mates";
@@ -58335,6 +58354,8 @@ lin tell_V2Q = mkV2Q (IrregEng.tell_V) noPrep;
lin tell_V2S = mkV2S (IrregEng.tell_V) noPrep;
lin tell_V2V = mkV2V (IrregEng.tell_V) noPrep to_Prep;
lin tell_V3 = mkV3 (IrregEng.tell_V) noPrep noPrep;
lin tell_1_V3 = mkV3 (IrregEng.tell_V) noPrep noPrep;
lin tell_2_V3 = mkV3 (IrregEng.tell_V) noPrep noPrep;
lin tell_VS = mkVS (IrregEng.tell_V);
lin tell_VV = mkVV (IrregEng.tell_V);
lin tell_apart_V2 = mkV2 (partV IrregEng.tell_V "apart");
@@ -64353,6 +64374,8 @@ lin wastrel_N = mkN "wastrel" "wastrels";
lin watch_N = mkN "watch" "watches";
lin watch_V = mkV "watch" "watches" "watched" "watched" "watching";
lin watch_V2 = mkV2 (mkV "watch" "watches" "watched" "watched" "watching");
lin watch_1_V2 = mkV2 (mkV "watch" "watches" "watched" "watched" "watching");
lin watch_2_V2 = mkV2 (mkV "watch" "watches" "watched" "watched" "watching");
lin watch_V2V = mkV2V (mkV "watch" "watches" "watched" "watched" "watching") noPrep to_Prep ;
lin watch_VS = mkVS (mkV "watch" "watches" "watched" "watched" "watching");
lin watch_chain_N = mkN "watch-chain" "watch-chains";

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,60 @@
1. Create a check list for Swe
do
bnc <- readFile "bnc-to-check.txt" >>= return . words -- list of BNC funs
dict <- readFile "DictionarySwe.gf" >>= return . map words . lines -- current Swe lexicon
let dictmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- dict]
let bncdict = [(f,maybe "variants{} ;" id $ Data.Map.lookup f dictmap) | f <- bnc] -- current Swe for BNC
writeFile "bncswe.txt" $ unlines [unwords ("lin":f:"=":[ws]) | (f,ws) <- bncdict] -- print inspectable file
2. Inspect the check list
went through one thousand
- corrected everything
- removed trailing comments from corrected entries
- split senses
- added a -- | comment for disambiguating new senses
move the checked words to correctswe.txt
3. Apply split senses
grep "\-\- |" correctswe.txt | sort
Copy split senses to bnc-to-check.txt
- *but don't remove the unsplit senses* because they are needed to find words from other languages
Copy split senses to Dictionary.gf, together with the -- | comments
Make copies for split senses in DictionaryEng.gf
Verify the result by compiling DictionaryEng.gf
4. Extend the Swe lexicon
do
old <- readFile "DictionarySwe.gf" >>= return . map words . lines -- read old lexicon
new <- readFile "correctswe.txt" >>= return . map words . lines -- read corrected and new words
let oldmap = Data.Map.fromList [(f,unwords ws) | "lin":f:"=":ws <- old]
let newlist = [(f,unwords (takeWhile (/= "--") ws)) | "lin":f:"=":ws <- new] -- drop comments from corrected words
let newmap = foldr (uncurry Data.Map.insert) oldmap newlist -- insert corrected words
writeFile "newswe.txt" $ unlines [unwords ("lin":f:"=":[ws]) | (f,ws) <- Data.Map.assocs newmap] -- print lin rules
Replace the body of DictionarySwe.gf by newswe.txt
Compile DictionarySwe.gf
5. Spare the rest to do
Remove the corrected words from bncswe.txt.
Or take note of the last word that was checked already.

View File

@@ -90,6 +90,8 @@ more_Adv
about_Prep
now_Adv
last_A
last_1_A
last_2_A
other_A
give_V3
give_V2
@@ -140,6 +142,8 @@ thing_N
tell_VV
tell_VS
tell_V3
tell_1_V3
tell_2_V3
tell_V2V
tell_V2S
tell_V2Q
@@ -183,7 +187,6 @@ leave_V2
leave_V
life_N
great_A
where_Adv
case_N
woman_N
over_Adv
@@ -263,6 +266,8 @@ about_Adv
something_NP
school_N
in_Adv
in_1_Adv
in_2_Adv
small_A
place_N
before_Prep
@@ -406,6 +411,8 @@ hear_V2V
hear_V2
hear_V
room_N
room_1_N
room_2_N
whether_Subj
water_N
form_N
@@ -498,6 +505,8 @@ sit_VA
sit_V2
sit_V
market_N
market_1_N
market_2_N
appear_VV
appear_VS
appear_VA
@@ -614,6 +623,8 @@ low_A
cost_N
little_Det
matter_N
matter_1_N
matter_2_N
community_N
remain_VV
remain_VS
@@ -685,6 +696,8 @@ spend_V2
spend_V
force_N
condition_N
condition_1_N
condition_2_N
paper_N
off_Prep
major_A
@@ -756,6 +769,8 @@ management_N
morning_N
draw_V2V
draw_V2
draw_1_V2
draw_2_V2
draw_V
hope_VV
hope_VS
@@ -782,6 +797,7 @@ foot_N
clear_A
boy_N
game_N
game_3_N
game_2_N
game_1_N
food_N
@@ -826,6 +842,8 @@ cause_VS
cause_V2V
cause_V2
arm_N
arm_1_N
arm_2_N
history_N
parent_N
land_N
@@ -833,6 +851,8 @@ trade_N
watch_VS
watch_V2V
watch_V2
watch_1_V2
watch_2_V2
watch_V
white_A
situation_N
@@ -882,6 +902,8 @@ cover_V2
apply_VV
apply_V2V
apply_V2
apply_1_V2
apply_2_V2
apply_V
project_N
raise_V2V

File diff suppressed because it is too large Load Diff