1
0
forked from GitHub/gf-core

300 full-penn top words in Dict

This commit is contained in:
aarne
2013-04-03 20:27:34 +00:00
parent d10c6bc85f
commit 92f3662b3f
3 changed files with 269 additions and 6 deletions

View File

@@ -59099,7 +59099,7 @@ austria_PN = mkPN (mkN "Itä" (mkN "valta")) ; --MAN
bare_A = mkA "paljas" ; --MAN
bear_N = mkN "karhu" ; --MAN
because_of_Prep = mkPrep "vuoksi" ; --MAN
begin_VV = mkVV "alkaa" ; --MAN
begin_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN
below_Prep = mkPrep "alapuolella" ; --MAN
beneath_Prep = mkPrep "alapuolella" ; --MAN
benefit_V = mkV "hyötyä" ; --MAN
@@ -59276,8 +59276,8 @@ soon_Adv = mkAdv "pian" ; --MAN
spain_PN = mkPN "Espanja" ; --MAN
stabilize_V = mkV "vakiinnuttaa" ; --MAN
start_V = mkV "aloittaa" ; --MAN
start_ing_VV = mkVV "alkaa" ; --MAN
start_to_VV = mkVV "alkaa" ; --MAN
start_ing_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN
start_to_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN
stockholm_PN = mkPN "Tukholma" ; --MAN
strike_N = mkN "lakko" ; --MAN
such_Predet = mkPredet "sellainen" ; --MAN
@@ -59429,7 +59429,7 @@ cause_V2V = mkWV2V (mkV "saada") ; --MANCV2
choose_V2 = mkV2 "valita" ; --MANCV2
come_V2 = mkWV2 (k67 "tulla") illative ; --MANCV2
defend_V2 = mkV2 "puolustaa" partitive ; --MANCV2
describe_V2 = mkV2 "kuvata" partitive ; --MANCV2
describe_V2 = mkV2 (mkV "kuvata" "kuvasi") partitive ; --MANCV2
design_V2 = mkV2 "muotoilla" ; --MANCV2
develop_V2 = mkV2 "kehittää" ; --MANCV2
dismiss_V2 = mkV2 (mkV "hylätä" "hylkäsi") ; --MANCV2
@@ -60063,9 +60063,262 @@ yield_N = mkN "tuotos" ;
weekend_N = mkN "viikonloppu" ; -- 946
withdrawal_N = mkN "pois" (mkN "vetäminen") ;
-- from full penn, down to >3
inasmuch_as_Adv = mkAdv "siinä määrin kuin" ;
up_Prep = mkPrep "ylös" partitive ;
whether_Prep = mkPrep "onko" nominative ;
out_of_Prep = mkPrep "ulkopuolella" ;
down_Prep = mkPrep "alas" partitive ;
though_Subj = mkSubj "vaikka" ;
off_Prep = mkPrep "ulkopuolella" ;
plunge_N = mkN "lasku" ;
san_francisco_PN = mkPN "San Francisco" ;
thus_Adv = mkAdv "siten" ;
ever_AdV = mkAdV "koskaan" ;
out_Prep = mkPrep "ulos" elative ;
see_V = mkV "nähdä" ;
price_V2 = mkV2 "hinnoitella" ;
next_Prep = mkPrep elative "seuraava" ;
mind_N = mkN "mieli" ;
as_of_Prep = mkPrep "koskien" partitive ;
achieve_V2 = mkV2 "saavuttaa" ;
duty_N = mkN "velvollisuus" ;
wake_N = mkN "herääminen" ;
ahead_of_Prep = mkPrep "edellä" ;
insure_V2 = mkV2 "vakuuttaa" ;
once_Prep = mkPrep "kerran" nominative ;
white_N = mkN "valkoinen" ;
confident_A = mkA "luottavainen" ;
houston_PN = mkPN "Houston" ;
brain_N = mkN "aivo" ;
approximate_A = mkA "likimääräinen" ;
germany_PN = mkPN "Saksa" ;
though_Prep = mkPrep "vaikka" nominative ;
withdraw_V = mkV "luopua" ;
attend_V2 = mkV2 "osallistua" elative ;
draw_V = mkV "vetää" ;
on_behalf_of_Prep = mkPrep "puolesta" ;
ringer_N = mkN "soittaja" ;
therefore_Adv = mkAdv "siksi" ;
entity_N = mkN "olio" ;
manhattan_PN = mkPN "Manhattan" ;
display_V2 = mkV2 "näyttää" ;
as_for_Prep = mkPrep "mitä tulee" elative ;
seoul_PN = mkPN "Seoul" ;
bushel_N = mkN "busheli" ;
where_Subj = mkSubj "missä" ;
on_top_of_Prep = mkPrep "päälle" ;
lebanon_PN = mkPN (mkN "Libanon" "Libanoneja") ;
michigan_PN = mkPN (mkN "Michigan" "Michiganeja") ;
ensure_V2 = mkV2 "varmistaa" ;
dual_A = mkA "duaalinen" ;
around_AdN = mkAdN "suunnilleen" ;
computerize_V2 = mkV2 "tietokoneistaa" ;
stabilize_V2 = mkV2 "vakiinnuttaa" ;
illinois_PN = mkPN (mkN "Illinois" "Illinoiseja") ;
brussels_PN = mkPN (mkN "Bryssel" "Brysseliä") ;
benefit_V2 = mkV2 "hyödyttää" partitive ;
auction_V2 = mkV2 "huutokaupata" ;
assure_V2 = mkV2 "varmistaa" ;
high_grade_A = mkA "korkea-asteinen" ;
allegedly_AdV = mkAdV "väitetysti" ;
alaska_PN = mkPN "Alaska" ;
tip_N = mkN "kärki" ;
package_V2 = mkV2 "pakata" ;
marry_V2 = mkV2 "naida" ;
magnetic_A = mkA "magneettinen" ;
constitute_V2 = mkV2 "muodostaa" ;
massachusetts_PN = mkPN "Massachusetts" ;
initiate_V2 = mkV2 "aloittaa" ;
cuba_PN = mkPN "Kuuba" ;
past_Prep = mkPrep "ohi" ;
manila_PN = mkPN "Manila" ;
apiece_Adv = mkAdv "kappale" ;
saudi_arabia_PN = mkPN "Saudi-Arabia" ;
reversal_N = mkN "kääntö" ;
paint_V2 = mkV2 "maalata" ;
minnesota_PN = mkPN "Minnesota" ;
liquid_A = mkA "nestemäinen" ;
killing_N = mkN "tappaminen" ;
hawaii_PN = mkPN "Havaiji" ;
escape_V2 = mkV2 "paeta" partitive ;
thanks_to_Prep = mkPrep "ansiosta" ;
tax_free_A = mkA "verovapaa" ;
reinvest_V2 = mkV2 (mkV (mkV "investoida") "uudelleen") elative ;
korea_PN = mkPN "Korea" ;
consequent_A = mkA "johdonmukainen" ;
blame_V = mkV "syyttää" ;
vs_Prep = mkPrep "vs" nominative ;
outperform_V2 = mkV2 "voittaa" ;
equip_V2 = mkV2 "varustaa" ;
earmark_V2 = mkV2 (mkV "korva" (mkV "merkitä")) ;
catastrophic_A = mkA "katastrofaalinen" ;
long_distance_A = mkA "pitkämatkalainen" ;
insure_V = mkV "vakuuttaa" ;
hence_Adv = mkAdv "sen vuoksi" ;
full_time_A = mkA "täyspäiväinen" ;
undoubted_A = mkA "epäilemätön" ;
stamford_PN = mkPN "Stamford" ;
escape_V = mkV (mkV "lähteä") "pakoon" ;
cambodia_PN = mkPN "Kambodza" ;
attend_V = mkV "osallistua" ;
worth_Prep = mkPrep "arvoinen" ;
visual_A = mkA "visuaalinen" ;
upward_A = mkA "ylöspäinpyrkivä" ;
stunning_A = mkA "mykistävä" ;
since_Adv = mkAdv "siitä lähtien" ;
shell_N = mkN "kuori" ;
reserve_V2 = mkV2 "varata" ;
pakistan_PN = mkPN "Pakistan" ;
missouri_PN = mkPN "Missouri" ;
free_of_A2 = mkA2 (mkA "vapaa") (mkPrep elative) ;
exempt_A = mkA "vapautettu" ;
carry_N = mkN "kantaa" ;
belgium_PN = mkPN "Belgia" ;
aside_from_Prep = mkPrep "syrjässä" elative ;
argentina_PN = mkPN "Argentiina" ;
thailand_PN = mkPN "Thaimaa" ;
snack_N = mkN "välipala" ;
reinstate_V2 = mkV2 (mkV (mkV "asettaa") "uudelleen") ;
peru_PN = mkPN "Peru" ;
panic_V = mkV (mkV "joutua") "paniikkiin" ;
lock_N = mkN "lukko" ;
large_scale_A = mkA "laajamittainen" ;
kentucky_PN = mkPN "Kentucky" ;
delaware_PN = mkPN "Delaware" ;
continental_A = mkA "mannermainen" ;
casual_A = mkA "epäformaali" ;
artistic_A = mkA "taiteellinen" ;
arkansas_PN = mkPN "Arkansas" ;
alert_V2 = mkV2 "varoittaa" partitive ;
alarm_V2 = mkV2 "hälyttää" ;
aboard_Prep = mkPrep "kannella" ;
wedding_N = mkN "vihkiminen" ;
trace_V2 = mkV2 "jäljittää" ;
toilet_N = mkN "käymälä" ;
tide_N = mkN "vuoro" L.water_N ;
tennessee_PN = mkPN "Tennessee" ;
spare_A = mkA "ylijäänyt" ;
saudi_arabian_A = mkA "saudi-arabialainen" ;
reorganize_V2 = mkV2 "uudelleenorganisoida" ;
prior_to_Prep = mkPrep partitive "edeltävä" ;
premise_N = mkN "tila" ;
oversubscribe_V2 = mkV2 "ylibuukata" ;
mandate_N = mkN "mandaatti" ;
in_spite_of_Prep = mkPrep elative "huolimatta" ;
feat_N = mkN "uroteko" ;
debris_1_N = mkN "ylijäämä" ;
crush_N = mkN "murska" ;
conditional_A = mkA "ehdollinen" ;
bleed_V = mkV (mkV "vuotaa") "verta" ;
back_to_Prep = mkPrep "takaisin" elative ;
appreciate_V = mkV "arvostaa" ;
allegedly_AdA = mkAdA "muka" ;
surpass_V2 = mkV2 "ylittää" ;
since_Subj = mkSubj "siitä lähtien kun" ;
short_lived_A = mkA "lyhytikäinen" ;
plumbing_N = mkN "notkahdus" ;
pacific_PN = mkPN (mkN "Tyyni" L.sea_N) ;
organ_N = mkN "elin" ;
occupy_V2 = mkV2 "vallata" ;
notwithstanding_Prep = mkPrep elative "huolimatta" ;
misrepresent_V2 = mkV2 "vääristellä" partitive ;
indiana_PN = mkPN "Indiana" ;
competent_A = mkA "kompetentti" ;
chile_PN = mkPN "Chile" ;
blue_collar_A = mkA "työläishenkinen" ;
bloody_A = mkA "verinen" ;
bloated_A = mkA "paisuteltu" ;
athletic_A = mkA "atleettinen" ;
as_opposed_to_Prep = mkPrep "toisin kuin" nominative ;
appreciate_V2 = mkV2 "arvostaa" partitive ;
applause_N = mkN "suosionosoitus" ;
align_V2 = mkV2 (mkV (mkV "asettaa") "rinnakkain") ;
with_respect_to_Prep = mkPrep "suhteessa" illative ;
watt_N = mkN "watti" ;
till_Prep = mkPrep illative "asti" ;
stamp_N = mkN "leima" ;
spectator_N = mkN "katsoja" ;
slip_N = mkN "erehdys" ;
self_employed_A = mkA "itsensätyöllistävä" ;
russia_PN = mkPN "Venäjä" ;
remove_V = mkV "poistaa" ;
portugal_PN = mkPN "Portugali" ;
patience_N = mkN "kärsivällisyys" ;
part_time_A = mkA "osa-aikainen" ;
paint_V = mkV "maalata" ;
orange_1_N = mkN "appelsiini" ;
naked_A = mkA "alaston" ;
make_N = mkN "teko" ;
jail_V2 = mkV2 "vangita" ;
imprison_V2 = mkV2 "vangita" ;
hemorrhage_V = mkV (mkV "vuotaa") "verta" ; ----
durable_N = mkN "kestokulutus" (mkN "hyödyke") ;
burn_V2 = mkV2 "polttaa" ;
atop_Prep = mkPrep "huipulla" ;
alongside_Prep = mkPrep "rinnalla" ;
unpredictable_A = mkA "ennustamaton" ;
preferential_A = mkA "ensisijainen" ;
occupy_V = mkV "vallata" ;
montana_PN = mkPN "Montana" ;
long_range_A = mkA "pitkävaikutteinen" ;
less_than_AdN = mkAdN "alle" ;
irreparable_A = mkA "korjaamaton" ;
high_level_A = mkA "korkeatasoinen" ;
hear_of_V2 = mkV2 "kuulla" elative ;
essay_N = mkN "essee" ;
comment_VS = mkVS "kommentoida" ;
bulgaria_PN = mkPN "Bulgaria" ;
built_in_A = mkA "sisäänrakennettu" ;
beirut_PN = mkPN "Beirut" ;
beat_N = mkN "isku" ;
bankrupt_A = mkA (mkN "konkurssi" (mkN "kypsä")) ;
bangkok_PN = mkPN "Bangkok" ;
alumnus_N = mkN "alumni" ;
alone_A = mkA "yksi" ;
wreckage_N = mkN "romuttuminen" ;
well_intentioned_A = mkA "hyväätarkoittava" ;
weekly_Adv = mkAdv "viikoittain" ;
syria_PN = mkPN "Syyria" ;
st_petersburg_PN = mkPN "Pietari" ;
riot_N = mkN "mellakka" ;
reserve_V = mkV (mkV "esittää") "varaus" ;
repossess_V2 = mkV2 (mkV (mkV "omistaa") "uudelleen") ;
re_examine_V2 = mkV2 (mkV (mkV "tarkastaa") "uudelleen") ;
re_evaluate_V2 = mkV2 (mkV (mkV "arvioida") "uudelleen") ;
primitive_A = mkA "primitiivinen" ;
pass_N = mkN "passi" ;
noble_A = mkA "jalo" ;
nigeria_PN = mkPN "Nigeria" ;
munich_PN = mkPN "München" ;
middle_aged_A = mkA "keski-ikäinen" ;
mate_1_N = mkN "kumppani" ;
marry_V = mkV L.go_V "naimisiin" ;
lack_V = mkV "puuttua" ;
in_place_of_Prep = mkPrep "sijasta" ;
in_lieu_of_Prep = mkPrep "sijasta" ;
high_speed_A = mkA "nopea" ;
high_powered_A = mkA "voimakas" ;
governmental_A = mkA "hallituksellekuuluva" ;
full_fledged_A = mkA "täysimääräinen" ;
fond_A = mkA "kiintynyt" ;
dropout_N = mkN "keskeyttänyt" ;
denmark_PN = mkPN "Tanska" ;
customize_V2 = mkV2 "räätälöidä" ;
cure_1_N = mkN "hoito" ;
co_found_V2 = mkV2 (mkV olla_V "mukana perustamassa") partitive ;
champion_V2 = mkV2 "hallita" ;
burn_N = mkN "palo" ;
bra_N = mkN "rinta" (mkN "liivi") ;
bolt_N = mkN "pultti" ;
beside_Prep = mkPrep "vieressä" ;
aesthetic_A = mkA "esteettinen" ;
-- miscellaneous additions
how8much_IDet = {s = \\c => "kuinka" ++ (snoun2nounBind (exceptNomN (mkN "paljo") "paljon")).s ! NCase Sg c ; n = Sg ; isNum = False} ;
hang_over_V2 = mkV2 (mkV "riippua") (mkPrep "yläpuolella") ;
}

View File

@@ -182,7 +182,7 @@ oper
mkV : (huutaa,dan,taa,tavat,takaa,detaan,sin,si,sisi,tanut,dettu,tanee : Str) -> V ; -- worst-case verb
mkV : VK -> V ; -- verb from DictFin (Kotus)
mkV : V -> Str -> V ; -- hakata päälle (particle verb)
--- mkV : Str -> V -> V ; -- laimin+lyödä (prefixed verb)
mkV : Str -> V -> V ; -- laimin+lyödä (prefixed verb)
} ;
-- All the patterns above have $nominative$ as subject case.
@@ -604,6 +604,7 @@ mkVS = overload {
huusin,huusi,huusisi,huutanut,huudettu,huutanee : Str) -> V = mk12V ;
mkV : (sana : VK) -> V = \w -> vforms2sverb w.s ** {sc = NPCase Nom ; lock_V = <> ; p = []} ;
mkV : V -> Str -> V = \w,p -> {s = w.s ; sc = w.sc ; lock_V = <> ; h = w.h ; p = p} ;
mkV : Str -> V -> V = \s,v -> {s = \\f => s + v.s ! f ; sc = v.sc ; lock_V = <> ; h = v.h ; p = v.p} ;
} ;
mk1V : Str -> V = \s ->

View File

@@ -196,5 +196,14 @@ Implemented an elementary chunking translator, located in svn://molto-project.eu
For the first time, able to "translate everything" from English to Finnish. The quality is horrible of course.
3/4
Worked with analysis tools, completed most of the first 300 full-Penn
words (>3) still missing in Dict. Changes in 250 sentences in
wsj-3220.
Rough estimate: in DictEngFin, there are 60k words, of which 57.5k from
WN, 2.5k manual (based on grep mkW DictEngFin.gf).