From 92f3662b3fd55940f29f081f36d2c6cbea15ce0d Mon Sep 17 00:00:00 2001 From: aarne Date: Wed, 3 Apr 2013 20:27:34 +0000 Subject: [PATCH] 300 full-penn top words in Dict --- lib/src/finnish/stemmed/DictEngFin.gf | 263 +++++++++++++++++++++++- lib/src/finnish/stemmed/ParadigmsFin.gf | 3 +- lib/src/finnish/stemmed/log.txt | 9 + 3 files changed, 269 insertions(+), 6 deletions(-) diff --git a/lib/src/finnish/stemmed/DictEngFin.gf b/lib/src/finnish/stemmed/DictEngFin.gf index 3b5113b07..65e917fbe 100644 --- a/lib/src/finnish/stemmed/DictEngFin.gf +++ b/lib/src/finnish/stemmed/DictEngFin.gf @@ -59099,7 +59099,7 @@ austria_PN = mkPN (mkN "Itä" (mkN "valta")) ; --MAN bare_A = mkA "paljas" ; --MAN bear_N = mkN "karhu" ; --MAN because_of_Prep = mkPrep "vuoksi" ; --MAN -begin_VV = mkVV "alkaa" ; --MAN +begin_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN below_Prep = mkPrep "alapuolella" ; --MAN beneath_Prep = mkPrep "alapuolella" ; --MAN benefit_V = mkV "hyötyä" ; --MAN @@ -59276,8 +59276,8 @@ soon_Adv = mkAdv "pian" ; --MAN spain_PN = mkPN "Espanja" ; --MAN stabilize_V = mkV "vakiinnuttaa" ; --MAN start_V = mkV "aloittaa" ; --MAN -start_ing_VV = mkVV "alkaa" ; --MAN -start_to_VV = mkVV "alkaa" ; --MAN +start_ing_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN +start_to_VV = mkVV (mkV "alkaa" "alkoi") ; --MAN stockholm_PN = mkPN "Tukholma" ; --MAN strike_N = mkN "lakko" ; --MAN such_Predet = mkPredet "sellainen" ; --MAN @@ -59429,7 +59429,7 @@ cause_V2V = mkWV2V (mkV "saada") ; --MANCV2 choose_V2 = mkV2 "valita" ; --MANCV2 come_V2 = mkWV2 (k67 "tulla") illative ; --MANCV2 defend_V2 = mkV2 "puolustaa" partitive ; --MANCV2 -describe_V2 = mkV2 "kuvata" partitive ; --MANCV2 +describe_V2 = mkV2 (mkV "kuvata" "kuvasi") partitive ; --MANCV2 design_V2 = mkV2 "muotoilla" ; --MANCV2 develop_V2 = mkV2 "kehittää" ; --MANCV2 dismiss_V2 = mkV2 (mkV "hylätä" "hylkäsi") ; --MANCV2 @@ -60063,9 +60063,262 @@ yield_N = mkN "tuotos" ; weekend_N = mkN "viikonloppu" ; -- 946 withdrawal_N = mkN "pois" (mkN "vetäminen") ; +-- from full penn, down to >3 + +inasmuch_as_Adv = mkAdv "siinä määrin kuin" ; +up_Prep = mkPrep "ylös" partitive ; +whether_Prep = mkPrep "onko" nominative ; +out_of_Prep = mkPrep "ulkopuolella" ; +down_Prep = mkPrep "alas" partitive ; +though_Subj = mkSubj "vaikka" ; +off_Prep = mkPrep "ulkopuolella" ; +plunge_N = mkN "lasku" ; +san_francisco_PN = mkPN "San Francisco" ; +thus_Adv = mkAdv "siten" ; +ever_AdV = mkAdV "koskaan" ; +out_Prep = mkPrep "ulos" elative ; +see_V = mkV "nähdä" ; +price_V2 = mkV2 "hinnoitella" ; +next_Prep = mkPrep elative "seuraava" ; +mind_N = mkN "mieli" ; +as_of_Prep = mkPrep "koskien" partitive ; +achieve_V2 = mkV2 "saavuttaa" ; +duty_N = mkN "velvollisuus" ; +wake_N = mkN "herääminen" ; +ahead_of_Prep = mkPrep "edellä" ; +insure_V2 = mkV2 "vakuuttaa" ; +once_Prep = mkPrep "kerran" nominative ; +white_N = mkN "valkoinen" ; +confident_A = mkA "luottavainen" ; +houston_PN = mkPN "Houston" ; +brain_N = mkN "aivo" ; +approximate_A = mkA "likimääräinen" ; +germany_PN = mkPN "Saksa" ; +though_Prep = mkPrep "vaikka" nominative ; +withdraw_V = mkV "luopua" ; +attend_V2 = mkV2 "osallistua" elative ; +draw_V = mkV "vetää" ; +on_behalf_of_Prep = mkPrep "puolesta" ; +ringer_N = mkN "soittaja" ; +therefore_Adv = mkAdv "siksi" ; +entity_N = mkN "olio" ; +manhattan_PN = mkPN "Manhattan" ; +display_V2 = mkV2 "näyttää" ; +as_for_Prep = mkPrep "mitä tulee" elative ; +seoul_PN = mkPN "Seoul" ; +bushel_N = mkN "busheli" ; +where_Subj = mkSubj "missä" ; +on_top_of_Prep = mkPrep "päälle" ; +lebanon_PN = mkPN (mkN "Libanon" "Libanoneja") ; +michigan_PN = mkPN (mkN "Michigan" "Michiganeja") ; +ensure_V2 = mkV2 "varmistaa" ; +dual_A = mkA "duaalinen" ; +around_AdN = mkAdN "suunnilleen" ; +computerize_V2 = mkV2 "tietokoneistaa" ; +stabilize_V2 = mkV2 "vakiinnuttaa" ; +illinois_PN = mkPN (mkN "Illinois" "Illinoiseja") ; +brussels_PN = mkPN (mkN "Bryssel" "Brysseliä") ; +benefit_V2 = mkV2 "hyödyttää" partitive ; +auction_V2 = mkV2 "huutokaupata" ; +assure_V2 = mkV2 "varmistaa" ; +high_grade_A = mkA "korkea-asteinen" ; +allegedly_AdV = mkAdV "väitetysti" ; +alaska_PN = mkPN "Alaska" ; +tip_N = mkN "kärki" ; +package_V2 = mkV2 "pakata" ; +marry_V2 = mkV2 "naida" ; +magnetic_A = mkA "magneettinen" ; +constitute_V2 = mkV2 "muodostaa" ; +massachusetts_PN = mkPN "Massachusetts" ; +initiate_V2 = mkV2 "aloittaa" ; +cuba_PN = mkPN "Kuuba" ; +past_Prep = mkPrep "ohi" ; +manila_PN = mkPN "Manila" ; +apiece_Adv = mkAdv "kappale" ; +saudi_arabia_PN = mkPN "Saudi-Arabia" ; +reversal_N = mkN "kääntö" ; +paint_V2 = mkV2 "maalata" ; +minnesota_PN = mkPN "Minnesota" ; +liquid_A = mkA "nestemäinen" ; +killing_N = mkN "tappaminen" ; +hawaii_PN = mkPN "Havaiji" ; +escape_V2 = mkV2 "paeta" partitive ; +thanks_to_Prep = mkPrep "ansiosta" ; +tax_free_A = mkA "verovapaa" ; +reinvest_V2 = mkV2 (mkV (mkV "investoida") "uudelleen") elative ; +korea_PN = mkPN "Korea" ; +consequent_A = mkA "johdonmukainen" ; +blame_V = mkV "syyttää" ; +vs_Prep = mkPrep "vs" nominative ; +outperform_V2 = mkV2 "voittaa" ; +equip_V2 = mkV2 "varustaa" ; +earmark_V2 = mkV2 (mkV "korva" (mkV "merkitä")) ; +catastrophic_A = mkA "katastrofaalinen" ; +long_distance_A = mkA "pitkämatkalainen" ; +insure_V = mkV "vakuuttaa" ; +hence_Adv = mkAdv "sen vuoksi" ; +full_time_A = mkA "täyspäiväinen" ; +undoubted_A = mkA "epäilemätön" ; +stamford_PN = mkPN "Stamford" ; +escape_V = mkV (mkV "lähteä") "pakoon" ; +cambodia_PN = mkPN "Kambodza" ; +attend_V = mkV "osallistua" ; +worth_Prep = mkPrep "arvoinen" ; +visual_A = mkA "visuaalinen" ; +upward_A = mkA "ylöspäinpyrkivä" ; +stunning_A = mkA "mykistävä" ; +since_Adv = mkAdv "siitä lähtien" ; +shell_N = mkN "kuori" ; +reserve_V2 = mkV2 "varata" ; +pakistan_PN = mkPN "Pakistan" ; +missouri_PN = mkPN "Missouri" ; +free_of_A2 = mkA2 (mkA "vapaa") (mkPrep elative) ; +exempt_A = mkA "vapautettu" ; +carry_N = mkN "kantaa" ; +belgium_PN = mkPN "Belgia" ; +aside_from_Prep = mkPrep "syrjässä" elative ; +argentina_PN = mkPN "Argentiina" ; +thailand_PN = mkPN "Thaimaa" ; +snack_N = mkN "välipala" ; +reinstate_V2 = mkV2 (mkV (mkV "asettaa") "uudelleen") ; +peru_PN = mkPN "Peru" ; +panic_V = mkV (mkV "joutua") "paniikkiin" ; +lock_N = mkN "lukko" ; +large_scale_A = mkA "laajamittainen" ; +kentucky_PN = mkPN "Kentucky" ; +delaware_PN = mkPN "Delaware" ; +continental_A = mkA "mannermainen" ; +casual_A = mkA "epäformaali" ; +artistic_A = mkA "taiteellinen" ; +arkansas_PN = mkPN "Arkansas" ; +alert_V2 = mkV2 "varoittaa" partitive ; +alarm_V2 = mkV2 "hälyttää" ; +aboard_Prep = mkPrep "kannella" ; +wedding_N = mkN "vihkiminen" ; +trace_V2 = mkV2 "jäljittää" ; +toilet_N = mkN "käymälä" ; +tide_N = mkN "vuoro" L.water_N ; +tennessee_PN = mkPN "Tennessee" ; +spare_A = mkA "ylijäänyt" ; +saudi_arabian_A = mkA "saudi-arabialainen" ; +reorganize_V2 = mkV2 "uudelleenorganisoida" ; +prior_to_Prep = mkPrep partitive "edeltävä" ; +premise_N = mkN "tila" ; +oversubscribe_V2 = mkV2 "ylibuukata" ; +mandate_N = mkN "mandaatti" ; +in_spite_of_Prep = mkPrep elative "huolimatta" ; +feat_N = mkN "uroteko" ; +debris_1_N = mkN "ylijäämä" ; +crush_N = mkN "murska" ; +conditional_A = mkA "ehdollinen" ; +bleed_V = mkV (mkV "vuotaa") "verta" ; +back_to_Prep = mkPrep "takaisin" elative ; +appreciate_V = mkV "arvostaa" ; +allegedly_AdA = mkAdA "muka" ; +surpass_V2 = mkV2 "ylittää" ; +since_Subj = mkSubj "siitä lähtien kun" ; +short_lived_A = mkA "lyhytikäinen" ; +plumbing_N = mkN "notkahdus" ; +pacific_PN = mkPN (mkN "Tyyni" L.sea_N) ; +organ_N = mkN "elin" ; +occupy_V2 = mkV2 "vallata" ; +notwithstanding_Prep = mkPrep elative "huolimatta" ; +misrepresent_V2 = mkV2 "vääristellä" partitive ; +indiana_PN = mkPN "Indiana" ; +competent_A = mkA "kompetentti" ; +chile_PN = mkPN "Chile" ; +blue_collar_A = mkA "työläishenkinen" ; +bloody_A = mkA "verinen" ; +bloated_A = mkA "paisuteltu" ; +athletic_A = mkA "atleettinen" ; +as_opposed_to_Prep = mkPrep "toisin kuin" nominative ; +appreciate_V2 = mkV2 "arvostaa" partitive ; +applause_N = mkN "suosionosoitus" ; +align_V2 = mkV2 (mkV (mkV "asettaa") "rinnakkain") ; +with_respect_to_Prep = mkPrep "suhteessa" illative ; +watt_N = mkN "watti" ; +till_Prep = mkPrep illative "asti" ; +stamp_N = mkN "leima" ; +spectator_N = mkN "katsoja" ; +slip_N = mkN "erehdys" ; +self_employed_A = mkA "itsensätyöllistävä" ; +russia_PN = mkPN "Venäjä" ; +remove_V = mkV "poistaa" ; +portugal_PN = mkPN "Portugali" ; +patience_N = mkN "kärsivällisyys" ; +part_time_A = mkA "osa-aikainen" ; +paint_V = mkV "maalata" ; +orange_1_N = mkN "appelsiini" ; +naked_A = mkA "alaston" ; +make_N = mkN "teko" ; +jail_V2 = mkV2 "vangita" ; +imprison_V2 = mkV2 "vangita" ; +hemorrhage_V = mkV (mkV "vuotaa") "verta" ; ---- +durable_N = mkN "kestokulutus" (mkN "hyödyke") ; +burn_V2 = mkV2 "polttaa" ; +atop_Prep = mkPrep "huipulla" ; +alongside_Prep = mkPrep "rinnalla" ; +unpredictable_A = mkA "ennustamaton" ; +preferential_A = mkA "ensisijainen" ; +occupy_V = mkV "vallata" ; +montana_PN = mkPN "Montana" ; +long_range_A = mkA "pitkävaikutteinen" ; +less_than_AdN = mkAdN "alle" ; +irreparable_A = mkA "korjaamaton" ; +high_level_A = mkA "korkeatasoinen" ; +hear_of_V2 = mkV2 "kuulla" elative ; +essay_N = mkN "essee" ; +comment_VS = mkVS "kommentoida" ; +bulgaria_PN = mkPN "Bulgaria" ; +built_in_A = mkA "sisäänrakennettu" ; +beirut_PN = mkPN "Beirut" ; +beat_N = mkN "isku" ; +bankrupt_A = mkA (mkN "konkurssi" (mkN "kypsä")) ; +bangkok_PN = mkPN "Bangkok" ; +alumnus_N = mkN "alumni" ; +alone_A = mkA "yksi" ; +wreckage_N = mkN "romuttuminen" ; +well_intentioned_A = mkA "hyväätarkoittava" ; +weekly_Adv = mkAdv "viikoittain" ; +syria_PN = mkPN "Syyria" ; +st_petersburg_PN = mkPN "Pietari" ; +riot_N = mkN "mellakka" ; +reserve_V = mkV (mkV "esittää") "varaus" ; +repossess_V2 = mkV2 (mkV (mkV "omistaa") "uudelleen") ; +re_examine_V2 = mkV2 (mkV (mkV "tarkastaa") "uudelleen") ; +re_evaluate_V2 = mkV2 (mkV (mkV "arvioida") "uudelleen") ; +primitive_A = mkA "primitiivinen" ; +pass_N = mkN "passi" ; +noble_A = mkA "jalo" ; +nigeria_PN = mkPN "Nigeria" ; +munich_PN = mkPN "München" ; +middle_aged_A = mkA "keski-ikäinen" ; +mate_1_N = mkN "kumppani" ; +marry_V = mkV L.go_V "naimisiin" ; +lack_V = mkV "puuttua" ; +in_place_of_Prep = mkPrep "sijasta" ; +in_lieu_of_Prep = mkPrep "sijasta" ; +high_speed_A = mkA "nopea" ; +high_powered_A = mkA "voimakas" ; +governmental_A = mkA "hallituksellekuuluva" ; +full_fledged_A = mkA "täysimääräinen" ; +fond_A = mkA "kiintynyt" ; +dropout_N = mkN "keskeyttänyt" ; +denmark_PN = mkPN "Tanska" ; +customize_V2 = mkV2 "räätälöidä" ; +cure_1_N = mkN "hoito" ; +co_found_V2 = mkV2 (mkV olla_V "mukana perustamassa") partitive ; +champion_V2 = mkV2 "hallita" ; +burn_N = mkN "palo" ; +bra_N = mkN "rinta" (mkN "liivi") ; +bolt_N = mkN "pultti" ; +beside_Prep = mkPrep "vieressä" ; +aesthetic_A = mkA "esteettinen" ; + + -- miscellaneous additions how8much_IDet = {s = \\c => "kuinka" ++ (snoun2nounBind (exceptNomN (mkN "paljo") "paljon")).s ! NCase Sg c ; n = Sg ; isNum = False} ; - + hang_over_V2 = mkV2 (mkV "riippua") (mkPrep "yläpuolella") ; } diff --git a/lib/src/finnish/stemmed/ParadigmsFin.gf b/lib/src/finnish/stemmed/ParadigmsFin.gf index 29c3a0dca..d646b8cd6 100644 --- a/lib/src/finnish/stemmed/ParadigmsFin.gf +++ b/lib/src/finnish/stemmed/ParadigmsFin.gf @@ -182,7 +182,7 @@ oper mkV : (huutaa,dan,taa,tavat,takaa,detaan,sin,si,sisi,tanut,dettu,tanee : Str) -> V ; -- worst-case verb mkV : VK -> V ; -- verb from DictFin (Kotus) mkV : V -> Str -> V ; -- hakata päälle (particle verb) ---- mkV : Str -> V -> V ; -- laimin+lyödä (prefixed verb) + mkV : Str -> V -> V ; -- laimin+lyödä (prefixed verb) } ; -- All the patterns above have $nominative$ as subject case. @@ -604,6 +604,7 @@ mkVS = overload { huusin,huusi,huusisi,huutanut,huudettu,huutanee : Str) -> V = mk12V ; mkV : (sana : VK) -> V = \w -> vforms2sverb w.s ** {sc = NPCase Nom ; lock_V = <> ; p = []} ; mkV : V -> Str -> V = \w,p -> {s = w.s ; sc = w.sc ; lock_V = <> ; h = w.h ; p = p} ; + mkV : Str -> V -> V = \s,v -> {s = \\f => s + v.s ! f ; sc = v.sc ; lock_V = <> ; h = v.h ; p = v.p} ; } ; mk1V : Str -> V = \s -> diff --git a/lib/src/finnish/stemmed/log.txt b/lib/src/finnish/stemmed/log.txt index 51453663f..e92450cd3 100644 --- a/lib/src/finnish/stemmed/log.txt +++ b/lib/src/finnish/stemmed/log.txt @@ -196,5 +196,14 @@ Implemented an elementary chunking translator, located in svn://molto-project.eu For the first time, able to "translate everything" from English to Finnish. The quality is horrible of course. +3/4 + +Worked with analysis tools, completed most of the first 300 full-Penn +words (>3) still missing in Dict. Changes in 250 sentences in +wsj-3220. + +Rough estimate: in DictEngFin, there are 60k words, of which 57.5k from +WN, 2.5k manual (based on grep mkW DictEngFin.gf). +