300 full-penn top words in Dict

This commit is contained in:
aarne
2013-04-03 20:27:34 +00:00
parent d10c6bc85f
commit 92f3662b3f
3 changed files with 269 additions and 6 deletions

View File

@@ -59099,7 +59099,7 @@ austria_PN = mkPN (mkN "Itä" (mkN "valta")) ; --MAN
bare_A = mkA "paljas" ; --MAN bare_A = mkA "paljas" ; --MAN
bear_N = mkN "karhu" ; --MAN bear_N = mkN "karhu" ; --MAN
because_of_Prep = mkPrep "vuoksi" ; --MAN because_of_Prep = mkPrep "vuoksi" ; --MAN
begin_VV = mkVV "alkaa" ; --MAN begin_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN
below_Prep = mkPrep "alapuolella" ; --MAN below_Prep = mkPrep "alapuolella" ; --MAN
beneath_Prep = mkPrep "alapuolella" ; --MAN beneath_Prep = mkPrep "alapuolella" ; --MAN
benefit_V = mkV "hyötyä" ; --MAN benefit_V = mkV "hyötyä" ; --MAN
@@ -59276,8 +59276,8 @@ soon_Adv = mkAdv "pian" ; --MAN
spain_PN = mkPN "Espanja" ; --MAN spain_PN = mkPN "Espanja" ; --MAN
stabilize_V = mkV "vakiinnuttaa" ; --MAN stabilize_V = mkV "vakiinnuttaa" ; --MAN
start_V = mkV "aloittaa" ; --MAN start_V = mkV "aloittaa" ; --MAN
start_ing_VV = mkVV "alkaa" ; --MAN start_ing_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN
start_to_VV = mkVV "alkaa" ; --MAN start_to_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN
stockholm_PN = mkPN "Tukholma" ; --MAN stockholm_PN = mkPN "Tukholma" ; --MAN
strike_N = mkN "lakko" ; --MAN strike_N = mkN "lakko" ; --MAN
such_Predet = mkPredet "sellainen" ; --MAN such_Predet = mkPredet "sellainen" ; --MAN
@@ -59429,7 +59429,7 @@ cause_V2V = mkWV2V (mkV "saada") ; --MANCV2
choose_V2 = mkV2 "valita" ; --MANCV2 choose_V2 = mkV2 "valita" ; --MANCV2
come_V2 = mkWV2 (k67 "tulla") illative ; --MANCV2 come_V2 = mkWV2 (k67 "tulla") illative ; --MANCV2
defend_V2 = mkV2 "puolustaa" partitive ; --MANCV2 defend_V2 = mkV2 "puolustaa" partitive ; --MANCV2
describe_V2 = mkV2 "kuvata" partitive ; --MANCV2 describe_V2 = mkV2 (mkV "kuvata" "kuvasi") partitive ; --MANCV2
design_V2 = mkV2 "muotoilla" ; --MANCV2 design_V2 = mkV2 "muotoilla" ; --MANCV2
develop_V2 = mkV2 "kehittää" ; --MANCV2 develop_V2 = mkV2 "kehittää" ; --MANCV2
dismiss_V2 = mkV2 (mkV "hylätä" "hylkäsi") ; --MANCV2 dismiss_V2 = mkV2 (mkV "hylätä" "hylkäsi") ; --MANCV2
@@ -60063,9 +60063,262 @@ yield_N = mkN "tuotos" ;
weekend_N = mkN "viikonloppu" ; -- 946 weekend_N = mkN "viikonloppu" ; -- 946
withdrawal_N = mkN "pois" (mkN "vetäminen") ; withdrawal_N = mkN "pois" (mkN "vetäminen") ;
-- from full penn, down to >3
inasmuch_as_Adv = mkAdv "siinä määrin kuin" ;
up_Prep = mkPrep "ylös" partitive ;
whether_Prep = mkPrep "onko" nominative ;
out_of_Prep = mkPrep "ulkopuolella" ;
down_Prep = mkPrep "alas" partitive ;
though_Subj = mkSubj "vaikka" ;
off_Prep = mkPrep "ulkopuolella" ;
plunge_N = mkN "lasku" ;
san_francisco_PN = mkPN "San Francisco" ;
thus_Adv = mkAdv "siten" ;
ever_AdV = mkAdV "koskaan" ;
out_Prep = mkPrep "ulos" elative ;
see_V = mkV "nähdä" ;
price_V2 = mkV2 "hinnoitella" ;
next_Prep = mkPrep elative "seuraava" ;
mind_N = mkN "mieli" ;
as_of_Prep = mkPrep "koskien" partitive ;
achieve_V2 = mkV2 "saavuttaa" ;
duty_N = mkN "velvollisuus" ;
wake_N = mkN "herääminen" ;
ahead_of_Prep = mkPrep "edellä" ;
insure_V2 = mkV2 "vakuuttaa" ;
once_Prep = mkPrep "kerran" nominative ;
white_N = mkN "valkoinen" ;
confident_A = mkA "luottavainen" ;
houston_PN = mkPN "Houston" ;
brain_N = mkN "aivo" ;
approximate_A = mkA "likimääräinen" ;
germany_PN = mkPN "Saksa" ;
though_Prep = mkPrep "vaikka" nominative ;
withdraw_V = mkV "luopua" ;
attend_V2 = mkV2 "osallistua" elative ;
draw_V = mkV "vetää" ;
on_behalf_of_Prep = mkPrep "puolesta" ;
ringer_N = mkN "soittaja" ;
therefore_Adv = mkAdv "siksi" ;
entity_N = mkN "olio" ;
manhattan_PN = mkPN "Manhattan" ;
display_V2 = mkV2 "näyttää" ;
as_for_Prep = mkPrep "mitä tulee" elative ;
seoul_PN = mkPN "Seoul" ;
bushel_N = mkN "busheli" ;
where_Subj = mkSubj "missä" ;
on_top_of_Prep = mkPrep "päälle" ;
lebanon_PN = mkPN (mkN "Libanon" "Libanoneja") ;
michigan_PN = mkPN (mkN "Michigan" "Michiganeja") ;
ensure_V2 = mkV2 "varmistaa" ;
dual_A = mkA "duaalinen" ;
around_AdN = mkAdN "suunnilleen" ;
computerize_V2 = mkV2 "tietokoneistaa" ;
stabilize_V2 = mkV2 "vakiinnuttaa" ;
illinois_PN = mkPN (mkN "Illinois" "Illinoiseja") ;
brussels_PN = mkPN (mkN "Bryssel" "Brysseliä") ;
benefit_V2 = mkV2 "hyödyttää" partitive ;
auction_V2 = mkV2 "huutokaupata" ;
assure_V2 = mkV2 "varmistaa" ;
high_grade_A = mkA "korkea-asteinen" ;
allegedly_AdV = mkAdV "väitetysti" ;
alaska_PN = mkPN "Alaska" ;
tip_N = mkN "kärki" ;
package_V2 = mkV2 "pakata" ;
marry_V2 = mkV2 "naida" ;
magnetic_A = mkA "magneettinen" ;
constitute_V2 = mkV2 "muodostaa" ;
massachusetts_PN = mkPN "Massachusetts" ;
initiate_V2 = mkV2 "aloittaa" ;
cuba_PN = mkPN "Kuuba" ;
past_Prep = mkPrep "ohi" ;
manila_PN = mkPN "Manila" ;
apiece_Adv = mkAdv "kappale" ;
saudi_arabia_PN = mkPN "Saudi-Arabia" ;
reversal_N = mkN "kääntö" ;
paint_V2 = mkV2 "maalata" ;
minnesota_PN = mkPN "Minnesota" ;
liquid_A = mkA "nestemäinen" ;
killing_N = mkN "tappaminen" ;
hawaii_PN = mkPN "Havaiji" ;
escape_V2 = mkV2 "paeta" partitive ;
thanks_to_Prep = mkPrep "ansiosta" ;
tax_free_A = mkA "verovapaa" ;
reinvest_V2 = mkV2 (mkV (mkV "investoida") "uudelleen") elative ;
korea_PN = mkPN "Korea" ;
consequent_A = mkA "johdonmukainen" ;
blame_V = mkV "syyttää" ;
vs_Prep = mkPrep "vs" nominative ;
outperform_V2 = mkV2 "voittaa" ;
equip_V2 = mkV2 "varustaa" ;
earmark_V2 = mkV2 (mkV "korva" (mkV "merkitä")) ;
catastrophic_A = mkA "katastrofaalinen" ;
long_distance_A = mkA "pitkämatkalainen" ;
insure_V = mkV "vakuuttaa" ;
hence_Adv = mkAdv "sen vuoksi" ;
full_time_A = mkA "täyspäiväinen" ;
undoubted_A = mkA "epäilemätön" ;
stamford_PN = mkPN "Stamford" ;
escape_V = mkV (mkV "lähteä") "pakoon" ;
cambodia_PN = mkPN "Kambodza" ;
attend_V = mkV "osallistua" ;
worth_Prep = mkPrep "arvoinen" ;
visual_A = mkA "visuaalinen" ;
upward_A = mkA "ylöspäinpyrkivä" ;
stunning_A = mkA "mykistävä" ;
since_Adv = mkAdv "siitä lähtien" ;
shell_N = mkN "kuori" ;
reserve_V2 = mkV2 "varata" ;
pakistan_PN = mkPN "Pakistan" ;
missouri_PN = mkPN "Missouri" ;
free_of_A2 = mkA2 (mkA "vapaa") (mkPrep elative) ;
exempt_A = mkA "vapautettu" ;
carry_N = mkN "kantaa" ;
belgium_PN = mkPN "Belgia" ;
aside_from_Prep = mkPrep "syrjässä" elative ;
argentina_PN = mkPN "Argentiina" ;
thailand_PN = mkPN "Thaimaa" ;
snack_N = mkN "välipala" ;
reinstate_V2 = mkV2 (mkV (mkV "asettaa") "uudelleen") ;
peru_PN = mkPN "Peru" ;
panic_V = mkV (mkV "joutua") "paniikkiin" ;
lock_N = mkN "lukko" ;
large_scale_A = mkA "laajamittainen" ;
kentucky_PN = mkPN "Kentucky" ;
delaware_PN = mkPN "Delaware" ;
continental_A = mkA "mannermainen" ;
casual_A = mkA "epäformaali" ;
artistic_A = mkA "taiteellinen" ;
arkansas_PN = mkPN "Arkansas" ;
alert_V2 = mkV2 "varoittaa" partitive ;
alarm_V2 = mkV2 "hälyttää" ;
aboard_Prep = mkPrep "kannella" ;
wedding_N = mkN "vihkiminen" ;
trace_V2 = mkV2 "jäljittää" ;
toilet_N = mkN "käymälä" ;
tide_N = mkN "vuoro" L.water_N ;
tennessee_PN = mkPN "Tennessee" ;
spare_A = mkA "ylijäänyt" ;
saudi_arabian_A = mkA "saudi-arabialainen" ;
reorganize_V2 = mkV2 "uudelleenorganisoida" ;
prior_to_Prep = mkPrep partitive "edeltävä" ;
premise_N = mkN "tila" ;
oversubscribe_V2 = mkV2 "ylibuukata" ;
mandate_N = mkN "mandaatti" ;
in_spite_of_Prep = mkPrep elative "huolimatta" ;
feat_N = mkN "uroteko" ;
debris_1_N = mkN "ylijäämä" ;
crush_N = mkN "murska" ;
conditional_A = mkA "ehdollinen" ;
bleed_V = mkV (mkV "vuotaa") "verta" ;
back_to_Prep = mkPrep "takaisin" elative ;
appreciate_V = mkV "arvostaa" ;
allegedly_AdA = mkAdA "muka" ;
surpass_V2 = mkV2 "ylittää" ;
since_Subj = mkSubj "siitä lähtien kun" ;
short_lived_A = mkA "lyhytikäinen" ;
plumbing_N = mkN "notkahdus" ;
pacific_PN = mkPN (mkN "Tyyni" L.sea_N) ;
organ_N = mkN "elin" ;
occupy_V2 = mkV2 "vallata" ;
notwithstanding_Prep = mkPrep elative "huolimatta" ;
misrepresent_V2 = mkV2 "vääristellä" partitive ;
indiana_PN = mkPN "Indiana" ;
competent_A = mkA "kompetentti" ;
chile_PN = mkPN "Chile" ;
blue_collar_A = mkA "työläishenkinen" ;
bloody_A = mkA "verinen" ;
bloated_A = mkA "paisuteltu" ;
athletic_A = mkA "atleettinen" ;
as_opposed_to_Prep = mkPrep "toisin kuin" nominative ;
appreciate_V2 = mkV2 "arvostaa" partitive ;
applause_N = mkN "suosionosoitus" ;
align_V2 = mkV2 (mkV (mkV "asettaa") "rinnakkain") ;
with_respect_to_Prep = mkPrep "suhteessa" illative ;
watt_N = mkN "watti" ;
till_Prep = mkPrep illative "asti" ;
stamp_N = mkN "leima" ;
spectator_N = mkN "katsoja" ;
slip_N = mkN "erehdys" ;
self_employed_A = mkA "itsensätyöllistävä" ;
russia_PN = mkPN "Venäjä" ;
remove_V = mkV "poistaa" ;
portugal_PN = mkPN "Portugali" ;
patience_N = mkN "kärsivällisyys" ;
part_time_A = mkA "osa-aikainen" ;
paint_V = mkV "maalata" ;
orange_1_N = mkN "appelsiini" ;
naked_A = mkA "alaston" ;
make_N = mkN "teko" ;
jail_V2 = mkV2 "vangita" ;
imprison_V2 = mkV2 "vangita" ;
hemorrhage_V = mkV (mkV "vuotaa") "verta" ; ----
durable_N = mkN "kestokulutus" (mkN "hyödyke") ;
burn_V2 = mkV2 "polttaa" ;
atop_Prep = mkPrep "huipulla" ;
alongside_Prep = mkPrep "rinnalla" ;
unpredictable_A = mkA "ennustamaton" ;
preferential_A = mkA "ensisijainen" ;
occupy_V = mkV "vallata" ;
montana_PN = mkPN "Montana" ;
long_range_A = mkA "pitkävaikutteinen" ;
less_than_AdN = mkAdN "alle" ;
irreparable_A = mkA "korjaamaton" ;
high_level_A = mkA "korkeatasoinen" ;
hear_of_V2 = mkV2 "kuulla" elative ;
essay_N = mkN "essee" ;
comment_VS = mkVS "kommentoida" ;
bulgaria_PN = mkPN "Bulgaria" ;
built_in_A = mkA "sisäänrakennettu" ;
beirut_PN = mkPN "Beirut" ;
beat_N = mkN "isku" ;
bankrupt_A = mkA (mkN "konkurssi" (mkN "kypsä")) ;
bangkok_PN = mkPN "Bangkok" ;
alumnus_N = mkN "alumni" ;
alone_A = mkA "yksi" ;
wreckage_N = mkN "romuttuminen" ;
well_intentioned_A = mkA "hyväätarkoittava" ;
weekly_Adv = mkAdv "viikoittain" ;
syria_PN = mkPN "Syyria" ;
st_petersburg_PN = mkPN "Pietari" ;
riot_N = mkN "mellakka" ;
reserve_V = mkV (mkV "esittää") "varaus" ;
repossess_V2 = mkV2 (mkV (mkV "omistaa") "uudelleen") ;
re_examine_V2 = mkV2 (mkV (mkV "tarkastaa") "uudelleen") ;
re_evaluate_V2 = mkV2 (mkV (mkV "arvioida") "uudelleen") ;
primitive_A = mkA "primitiivinen" ;
pass_N = mkN "passi" ;
noble_A = mkA "jalo" ;
nigeria_PN = mkPN "Nigeria" ;
munich_PN = mkPN "München" ;
middle_aged_A = mkA "keski-ikäinen" ;
mate_1_N = mkN "kumppani" ;
marry_V = mkV L.go_V "naimisiin" ;
lack_V = mkV "puuttua" ;
in_place_of_Prep = mkPrep "sijasta" ;
in_lieu_of_Prep = mkPrep "sijasta" ;
high_speed_A = mkA "nopea" ;
high_powered_A = mkA "voimakas" ;
governmental_A = mkA "hallituksellekuuluva" ;
full_fledged_A = mkA "täysimääräinen" ;
fond_A = mkA "kiintynyt" ;
dropout_N = mkN "keskeyttänyt" ;
denmark_PN = mkPN "Tanska" ;
customize_V2 = mkV2 "räätälöidä" ;
cure_1_N = mkN "hoito" ;
co_found_V2 = mkV2 (mkV olla_V "mukana perustamassa") partitive ;
champion_V2 = mkV2 "hallita" ;
burn_N = mkN "palo" ;
bra_N = mkN "rinta" (mkN "liivi") ;
bolt_N = mkN "pultti" ;
beside_Prep = mkPrep "vieressä" ;
aesthetic_A = mkA "esteettinen" ;
-- miscellaneous additions -- miscellaneous additions
how8much_IDet = {s = \\c => "kuinka" ++ (snoun2nounBind (exceptNomN (mkN "paljo") "paljon")).s ! NCase Sg c ; n = Sg ; isNum = False} ; how8much_IDet = {s = \\c => "kuinka" ++ (snoun2nounBind (exceptNomN (mkN "paljo") "paljon")).s ! NCase Sg c ; n = Sg ; isNum = False} ;
hang_over_V2 = mkV2 (mkV "riippua") (mkPrep "yläpuolella") ;
} }

View File

@@ -182,7 +182,7 @@ oper
mkV : (huutaa,dan,taa,tavat,takaa,detaan,sin,si,sisi,tanut,dettu,tanee : Str) -> V ; -- worst-case verb mkV : (huutaa,dan,taa,tavat,takaa,detaan,sin,si,sisi,tanut,dettu,tanee : Str) -> V ; -- worst-case verb
mkV : VK -> V ; -- verb from DictFin (Kotus) mkV : VK -> V ; -- verb from DictFin (Kotus)
mkV : V -> Str -> V ; -- hakata päälle (particle verb) mkV : V -> Str -> V ; -- hakata päälle (particle verb)
--- mkV : Str -> V -> V ; -- laimin+lyödä (prefixed verb) mkV : Str -> V -> V ; -- laimin+lyödä (prefixed verb)
} ; } ;
-- All the patterns above have $nominative$ as subject case. -- All the patterns above have $nominative$ as subject case.
@@ -604,6 +604,7 @@ mkVS = overload {
huusin,huusi,huusisi,huutanut,huudettu,huutanee : Str) -> V = mk12V ; huusin,huusi,huusisi,huutanut,huudettu,huutanee : Str) -> V = mk12V ;
mkV : (sana : VK) -> V = \w -> vforms2sverb w.s ** {sc = NPCase Nom ; lock_V = <> ; p = []} ; mkV : (sana : VK) -> V = \w -> vforms2sverb w.s ** {sc = NPCase Nom ; lock_V = <> ; p = []} ;
mkV : V -> Str -> V = \w,p -> {s = w.s ; sc = w.sc ; lock_V = <> ; h = w.h ; p = p} ; mkV : V -> Str -> V = \w,p -> {s = w.s ; sc = w.sc ; lock_V = <> ; h = w.h ; p = p} ;
mkV : Str -> V -> V = \s,v -> {s = \\f => s + v.s ! f ; sc = v.sc ; lock_V = <> ; h = v.h ; p = v.p} ;
} ; } ;
mk1V : Str -> V = \s -> mk1V : Str -> V = \s ->

View File

@@ -196,5 +196,14 @@ Implemented an elementary chunking translator, located in svn://molto-project.eu
For the first time, able to "translate everything" from English to Finnish. The quality is horrible of course. For the first time, able to "translate everything" from English to Finnish. The quality is horrible of course.
3/4
Worked with analysis tools, completed most of the first 300 full-Penn
words (>3) still missing in Dict. Changes in 250 sentences in
wsj-3220.
Rough estimate: in DictEngFin, there are 60k words, of which 57.5k from
WN, 2.5k manual (based on grep mkW DictEngFin.gf).