diff --git a/lib/src/finnish/stemmed/DictEngFin.gf b/lib/src/finnish/stemmed/DictEngFin.gf index ef88821f9..1defe6989 100644 --- a/lib/src/finnish/stemmed/DictEngFin.gf +++ b/lib/src/finnish/stemmed/DictEngFin.gf @@ -59077,243 +59077,247 @@ oper tehda_V = mkV (lin VK {s = c71 "tehdä"}) ; -- another batch of additions, 29/3/2013 lin -absent_Prep = mkPrep "poissa" elative ; -across_Prep = mkPrep "poikki" ; -alabama_PN = mkPN "Alabama" ; -albania_PN = mkPN "Albania" ; -along_Prep = mkPrep "pitkin" ; -already_AdV = mkAdV "jo" ; -america_PN = mkPN "Amerikka" ; -amid_Prep = mkPrep "keskellä" ; -amp_Conj = mkConj "&" ; -amsterdam_PN = mkPN "Amsterdam" ; -apart_from_Prep = mkPrep "lisäksi" ; -area_1_N = mkN "ala" ; -arizona_PN = mkPN "Arizona" ; -around_Prep = mkPrep "ympärillä" ; -as_Subj = mkSubj "kun" ; -as_long_as_Subj = mkSubj "niin kauan kun" ; -as_well_as_Conj = mkConj "yhtä hyvin kuin" ; -athens_PN = mkPN "Ateena" ; -australia_PN = mkPN "Australia" ; -austria_PN = mkPN (mkN "Itä" (mkN "valta")) ; -bare_A = mkA "paljas" ; -bear_N = mkN "karhu" ; -because_of_Prep = mkPrep "vuoksi" ; -begin_VV = mkVV "alkaa" ; -below_Prep = mkPrep "alapuolella" ; -beneath_Prep = mkPrep "alapuolella" ; -benefit_V = mkV "hyötyä" ; -besides_Prep = mkPrep "rinnalla" ; -beyond_Prep = mkPrep "saavuttamattomissa" ; -bonn_PN = mkPN "Bonn" ; -boston_PN = mkPN "Boston" ; -brazil_PN = mkPN "Brasilia" ; -brew_N = mkN "käymis_tuote" ; -britain_PN = mkPN "Britannia" ; -but_Prep = mkPrep "paitsi" nominative ; -but_Subj = mkSubj "mutta" ; -california_PN = mkPN "Kalifornia" ; -care_N = mkN "hoito" ; -chicago_PN = mkPN "Chicago" ; -china_PN = mkPN "Kiina" ; -close_A = mkA "läheinen" ; -columbia_PN = mkPN "Kolumbia" ; -comment_V = mkV "kommentoida" ; -communist_A = mkA "kommunistinen" ; -complication_N = mkN "komplikaatio" ; -connecticut_PN = mkPN "Connecticut" ; -crystalline_A = mkA "kiteinen" ; -cut_N = mkN "leikkaus" ; -dallas_PN = mkPN "Dallas" ; -default_V = mkV "laiminlyödä" ; -denizen_N = mkN "asukas" ; -denver_PN = mkPN "Denver" ; -differ_V = mkV "eritä" ; -disagree_V = mkV vOlla "eri mieltä" ; -discretionary_A = mkA "harkinnanvarainen" ; -discriminatory_A = mkA "erotteleva" ; -duty_free_A = mkA "verovapaa" ; -editorial_A = mkA "toimituksellinen" ; -else_Adv = mkAdv "muutoin" ; -encouragement_N = mkN "rohkaisu" ; -ensure_VS = mkVS "varmistaa" ; -essential_A = mkA "olennainen" ; -europe_PN = mkPN "Eurooppa" ; -even_though_Subj = mkSubj "vaikka" ; -expensive_A = mkA "kallis" ; -fact_finding_A = mkA (mkN "tietoa" (mkN "etsivä")) ; -far_reaching_A = mkA "kauaskantoinen" ; -feel_VS = mkVS "tuntea" ; -feel_V = mkV "tuntea" ; -feude_V = mkV "vihoitella" ; -finland_PN = mkPN "Suomi" ; -first_rate_A = mkA "ensiluokkainen" ; -florida_PN = mkPN "Florida" ; -for_starters_Adv = mkAdv "alkajaisiksi" ; -foreclose_V = mkV (mkV "sulkea") "markkinoilta" ; -four_part_A = mkA "neljänkeskeinen" ; -frankfurt_PN = mkPN "Frankfurt" ; -free_A = mkA "vapaa" ; -funeral_N = mkN "hautajaistilaisuus" ; -further_AdV = mkAdV "edelleen" ; -georgia_PN = mkPN "Georgia" ; -great_A = mkA "suurenmoinen" ; -greece_PN = mkPN "Kreikka" ; -half_Predet = mkPredet "puoliksi" ; -heavy_handed_A = mkA (mkN "raskas" (mkN "kätinen")) ; -help_N = mkN "apu" ; -high_priced_A = mkA "hintava" ; -hollywood_PN = mkPN "Hollywood" ; -honest_A = mkA "rehellinen" ; -hong_kong_PN = mkPN "Hong Kong" ; -hors_de_combat_A = mkA "haavoittunut" ; -hungary_PN = mkPN "Unkari" ; -hyperinflation_N = mkN "hyperinflaatio" ; -in_addition_Adv = mkAdv "lisäksi" ; -in_addition_to_Prep = mkPrep "lisäksi" ; -in_front_of_Prep = mkPrep "edessä" ; -india_PN = mkPN "Intia" ; -indianapolis_PN = mkPN "Indianapolis" ; -indonesia_PN = mkPN "Indonesia" ; -inside_Prep = mkPrep "sisällä" ; -instead_of_Prep = mkPrep "sijasta" ; -intimate_A = mkA "läheinen" ; -israel_PN = mkPN "Israel" ; -japan_PN = mkPN "Japani" ; -japaneseMasc_N = mkN "japanilainen" ; -just_AdV = mkAdV "pelkästään" ; -just_Predet = mkPredet "pelkästään" ; -kansas_PN = mkPN "Kansas" ; -key_A = mkA "ratkaiseva" ; -late_A = mkA "myöhäinen" ; -libya_PN = mkPN "Libya" ; -lie_1_V = mkV "maata" ; -lie_2_V = mkV "valehdella" ; -los_angeles_PN = mkPN "Los Angeles" ; -louisiana_PN = mkPN "Louisiana" ; -low_A = mkA "matala" ; -madrid_PN = mkPN "Madrid" ; -make_it_V = mkV "onnistua" ; -mandatory_A = mkA "pakollinen" ; -marketplace_N = mkN "markkinapaikka" ; -marriage_N = mkN "avioliitto" ; -maturity_3_N = mkN "kypsyys" ; -maybe_Adv = mkAdv "ehkä" ; -media_N = mkN "media" ; -mexico_PN = mkPN "Mexico" ; -miami_PN = mkPN "Miami" ; -milan_PN = mkPN "Milano" ; -minneapolis_PN = mkPN "Minneapolis" ; -mississippi_PN = mkPN "Mississippi" ; -mistrial_N = mkN "mistraali" ; -mod_cons_N = mkN "mukavuus" ; -more_than_AdN = mkAdN "yli" ; -moscow_PN = mkPN "Moskova" ; -namibia_PN = mkPN "Namibia" ; -nearby_A = mkA "läheinen" ; -neither7nor_DConj = mkConj "ei" "eikä" ; -never_AdV = mkAdV "koskaan" ; -nevertheless_Adv = mkAdv "kuitenkin" ; -new_york_PN = mkPN "New York" ; -nicaragua_PN = mkPN "Nicaragua" ; -no_longer_AdV = mkAdV "enää" ; -norway_PN = mkPN "Norja" ; -often_AdV = mkAdV "usein" ; -ohio_PN = mkPN "Ohio" ; -oklahoma_PN = mkPN "Oklahoma" ; -old_fashioned_A = mkA "vanhanaikainen" ; -once_Subj = mkSubj "sitten kun" ; -one_time_A = mkA "ainutkertainen" ; -onto_Prep = mkPrep "päälle" ; -optical_A = mkA "optinen" ; -outside_Prep = mkPrep "ulkopuolella" ; -painting_N = mkN "maalaus" ; -pall_N = mkN "paariliina" ; -panama_PN = mkPN "Panama" ; -party_N = mkN "puolue" ; -pend_V = mkV "riippua" ; -pennsylvania_PN = mkPN "Pennsylvania" ; -pent_up_A = mkA "patoutunut" ; -people_N = mkN "kansa" ; -per_Prep = mkPrep "per" nominative ; -perhaps_Adv = mkAdv "ehkä" ; -philippines_PN = mkPN "Filippiinit" ; -pittsburgh_PN = mkPN "Pittsburgh" ; -plastics_N = mkN "muovi" ; -plus_Conj = mkConj "plus" ; -poland_PN = mkPN "Puola" ; -present_day_A = mkA "tämänhetkinen" ; -pretoria_PN = mkPN "Pretoria" ; -pretty_AdA = mkAdA "melkoisen" ; -publishing_A = mkA "julkaiseva" ; -quite_AdA = mkAdA "melko" ; -quite_Predet = mkPredet "aika" ; -rank_N = mkN "arvoaste" ; -record_N = mkN "ennätys" ; -regime_1_N = mkN "hallinto" ; -representativeMasc_N = mkN "edustaja" ; -researcherMasc_N = mkN "tutkija" ; -resident_N = mkN "asukas" ; -role_1_N = mkN "rooli" ; -rome_PN = mkPN "Rooma" ; -run_N = mkN "juoksu" ; -san_antonio_PN = mkPN "San Antonio" ; -see_VS = mkVS (mkV "pitää" "huolta") ; -shield_N = mkN "kilpi" ; -sidney_PN = mkPN "Sidney" ; -signale_VS = mkVS "viestittää" ; -since_then_Adv = mkAdv "siitä lähtien" ; -sincere_A = mkA "vilpitön" ; -singapore_PN = mkPN "Singapore" ; -site_N = mkN "sijaintipaikka" ; -so_PConj = mkPConj "niinpä" ; -so_Subj = mkSubj "niin että" ; -so_called_A = mkA "niinsanottu" ; -soft_A = mkA "pehmeä" ; -somehow_AdV = mkAdV "jotenkin" ; -soon_AdV = mkAdV "pian" ; -soon_Adv = mkAdv "pian" ; -spain_PN = mkPN "Espanja" ; -stabilize_V = mkV "vakiinnuttaa" ; -start_V = mkV "aloittaa" ; -start_ing_VV = mkVV "alkaa" ; -start_to_VV = mkVV "alkaa" ; -stockholm_PN = mkPN "Tukholma" ; -strike_N = mkN "lakko" ; -such_Predet = mkPredet "sellainen" ; -such_as_Prep = mkPrep "kuten" nominative ; -sweden_PN = mkPN "Ruotsi" ; -syndicate_N = mkN "syndikaatti" ; -taipei_PN = mkPN "Taipei" ; -taiwan_PN = mkPN "Taiwan" ; -texas_PN = mkPN "Teksas" ; -throughout_Prep = mkPrep "läpi koko" genitive ; -tokyo_PN = mkPN "Tokio" ; -toronto_PN = mkPN "Toronto" ; -tough_A = mkA "tiukka" ; -toward_Prep = mkPrep partitive "kohti" ; -towards_Prep = mkPrep partitive "kohti" ; -turkey_PN = mkPN "Turkki" ; -typical_1_A = mkA "tyypillinen" ; -typical_3_A = mkA "tyypillinen" ; -unheard_of_A = mkA "ennenkuulumaton" ; -unique_A = mkA "ainutlaatuinen" ; -unit_3_N = mkN "yksikkö" ; -universe_N = mkN "niversumi" ; -unless_Subj = mkSubj "ellei" ; -unlike_Prep = mkPrep "erilainen kuin" nominative ; -upon_Prep = mkPrep "päällä" ; -vietnam_PN = mkPN "Vietnam" ; -virginia_PN = mkPN "Virginia" ; -washington_PN = mkPN "Washington" ; -well_known_A = mkA "tunnettu" ; -while_Subj = mkSubj "samaan aikaan kuin" ; -white_collar_A = mkA "valkokauluksinen" ; -whole_A = mkA "kokonainen" ; -wyoming_PN = mkPN "Wyoming" ; -yet_AdV = mkAdV "yhä" ; -zurich_PN = mkPN "Zürich" ; +absent_Prep = mkPrep "poissa" elative ; --MAN +across_Prep = mkPrep "poikki" ; --MAN +alabama_PN = mkPN "Alabama" ; --MAN +albania_PN = mkPN "Albania" ; --MAN +along_Prep = mkPrep "pitkin" ; --MAN +already_AdV = mkAdV "jo" ; --MAN +america_PN = mkPN "Amerikka" ; --MAN +amid_Prep = mkPrep "keskellä" ; --MAN +amp_Conj = mkConj "&" ; --MAN +amsterdam_PN = mkPN "Amsterdam" ; --MAN +apart_from_Prep = mkPrep "lisäksi" ; --MAN +area_1_N = mkN "ala" ; --MAN +arizona_PN = mkPN "Arizona" ; --MAN +around_Prep = mkPrep "ympärillä" ; --MAN +as_Subj = mkSubj "kun" ; --MAN +as_long_as_Subj = mkSubj "niin kauan kun" ; --MAN +as_well_as_Conj = mkConj "yhtä hyvin kuin" ; --MAN +athens_PN = mkPN "Ateena" ; --MAN +australia_PN = mkPN "Australia" ; --MAN +austria_PN = mkPN (mkN "Itä" (mkN "valta")) ; --MAN +bare_A = mkA "paljas" ; --MAN +bear_N = mkN "karhu" ; --MAN +because_of_Prep = mkPrep "vuoksi" ; --MAN +begin_VV = mkVV "alkaa" ; --MAN +below_Prep = mkPrep "alapuolella" ; --MAN +beneath_Prep = mkPrep "alapuolella" ; --MAN +benefit_V = mkV "hyötyä" ; --MAN +besides_Prep = mkPrep "rinnalla" ; --MAN +beyond_Prep = mkPrep "saavuttamattomissa" ; --MAN +bonn_PN = mkPN "Bonn" ; --MAN +boston_PN = mkPN "Boston" ; --MAN +brazil_PN = mkPN "Brasilia" ; --MAN +brew_N = mkN "käymis_tuote" ; --MAN +britain_PN = mkPN "Britannia" ; --MAN +but_Prep = mkPrep "paitsi" nominative ; --MAN +but_Subj = mkSubj "mutta" ; --MAN +california_PN = mkPN "Kalifornia" ; --MAN +care_N = mkN "hoito" ; --MAN +chicago_PN = mkPN "Chicago" ; --MAN +china_PN = mkPN "Kiina" ; --MAN +close_A = mkA "läheinen" ; --MAN +columbia_PN = mkPN "Kolumbia" ; --MAN +comment_V = mkV "kommentoida" ; --MAN +communist_A = mkA "kommunistinen" ; --MAN +complication_N = mkN "komplikaatio" ; --MAN +connecticut_PN = mkPN "Connecticut" ; --MAN +crystalline_A = mkA "kiteinen" ; --MAN +cut_N = mkN "leikkaus" ; --MAN +dallas_PN = mkPN "Dallas" ; --MAN +default_V = mkV "laiminlyödä" ; --MAN +denizen_N = mkN "asukas" ; --MAN +denver_PN = mkPN "Denver" ; --MAN +differ_V = mkV "eritä" ; --MAN +disagree_V = mkV vOlla "eri mieltä" ; --MAN +discretionary_A = mkA "harkinnanvarainen" ; --MAN +discriminatory_A = mkA "erotteleva" ; --MAN +duty_free_A = mkA "verovapaa" ; --MAN +editorial_A = mkA "toimituksellinen" ; --MAN +else_Adv = mkAdv "muutoin" ; --MAN +encouragement_N = mkN "rohkaisu" ; --MAN +ensure_VS = mkVS "varmistaa" ; --MAN +essential_A = mkA "olennainen" ; --MAN +europe_PN = mkPN "Eurooppa" ; --MAN +even_though_Subj = mkSubj "vaikka" ; --MAN +expensive_A = mkA "kallis" ; --MAN +fact_finding_A = mkA (mkN "tietoa" (mkN "etsivä")) ; --MAN +far_reaching_A = mkA "kauaskantoinen" ; --MAN +feel_VS = mkVS "tuntea" ; --MAN +feel_V = mkV "tuntea" ; --MAN +feude_V = mkV "vihoitella" ; --MAN +finland_PN = mkPN "Suomi" ; --MAN +first_rate_A = mkA "ensiluokkainen" ; --MAN +florida_PN = mkPN "Florida" ; --MAN +for_starters_Adv = mkAdv "alkajaisiksi" ; --MAN +foreclose_V = mkV (mkV "sulkea") "markkinoilta" ; --MAN +four_part_A = mkA "neljänkeskeinen" ; --MAN +frankfurt_PN = mkPN "Frankfurt" ; --MAN +free_A = mkA "vapaa" ; --MAN +funeral_N = mkN "hautajaistilaisuus" ; --MAN +further_AdV = mkAdV "edelleen" ; --MAN +georgia_PN = mkPN "Georgia" ; --MAN +great_A = mkA "suurenmoinen" ; --MAN +greece_PN = mkPN "Kreikka" ; --MAN +half_Predet = mkPredet "puoliksi" ; --MAN +heavy_handed_A = mkA (mkN "raskas" (mkN "kätinen")) ; --MAN +help_N = mkN "apu" ; --MAN +high_priced_A = mkA "hintava" ; --MAN +hollywood_PN = mkPN "Hollywood" ; --MAN +honest_A = mkA "rehellinen" ; --MAN +hong_kong_PN = mkPN "Hong Kong" ; --MAN +hors_de_combat_A = mkA "haavoittunut" ; --MAN +hungary_PN = mkPN "Unkari" ; --MAN +hyperinflation_N = mkN "hyperinflaatio" ; --MAN +in_addition_Adv = mkAdv "lisäksi" ; --MAN +in_addition_to_Prep = mkPrep "lisäksi" ; --MAN +in_front_of_Prep = mkPrep "edessä" ; --MAN +india_PN = mkPN "Intia" ; --MAN +indianapolis_PN = mkPN "Indianapolis" ; --MAN +indonesia_PN = mkPN "Indonesia" ; --MAN +inside_Prep = mkPrep "sisällä" ; --MAN +instead_of_Prep = mkPrep "sijasta" ; --MAN +intimate_A = mkA "läheinen" ; --MAN +israel_PN = mkPN "Israel" ; --MAN +japan_PN = mkPN "Japani" ; --MAN +japaneseMasc_N = mkN "japanilainen" ; --MAN +just_AdV = mkAdV "pelkästään" ; --MAN +just_Predet = mkPredet "pelkästään" ; --MAN +kansas_PN = mkPN "Kansas" ; --MAN +key_A = mkA "ratkaiseva" ; --MAN +late_A = mkA "myöhäinen" ; --MAN +libya_PN = mkPN "Libya" ; --MAN +lie_1_V = mkV "maata" ; --MAN +lie_2_V = mkV "valehdella" ; --MAN +los_angeles_PN = mkPN "Los Angeles" ; --MAN +louisiana_PN = mkPN "Louisiana" ; --MAN +low_A = mkA "matala" ; --MAN +madrid_PN = mkPN "Madrid" ; --MAN +make_it_V = mkV "onnistua" ; --MAN +mandatory_A = mkA "pakollinen" ; --MAN +marketplace_N = mkN "markkinapaikka" ; --MAN +marriage_N = mkN "avioliitto" ; --MAN +maturity_3_N = mkN "kypsyys" ; --MAN +maybe_Adv = mkAdv "ehkä" ; --MAN +media_N = mkN "media" ; --MAN +mexico_PN = mkPN "Mexico" ; --MAN +miami_PN = mkPN "Miami" ; --MAN +milan_PN = mkPN "Milano" ; --MAN +minneapolis_PN = mkPN "Minneapolis" ; --MAN +mississippi_PN = mkPN "Mississippi" ; --MAN +mistrial_N = mkN "mistraali" ; --MAN +mod_cons_N = mkN "mukavuus" ; --MAN +more_than_AdN = mkAdN "yli" ; --MAN +moscow_PN = mkPN "Moskova" ; --MAN +namibia_PN = mkPN "Namibia" ; --MAN +nearby_A = mkA "läheinen" ; --MAN +neither7nor_DConj = mkConj "ei" "eikä" ; --MAN +never_AdV = mkAdV "koskaan" ; --MAN +nevertheless_Adv = mkAdv "kuitenkin" ; --MAN +new_york_PN = mkPN "New York" ; --MAN +nicaragua_PN = mkPN "Nicaragua" ; --MAN +no_longer_AdV = mkAdV "enää" ; --MAN +norway_PN = mkPN "Norja" ; --MAN +often_AdV = mkAdV "usein" ; --MAN +ohio_PN = mkPN "Ohio" ; --MAN +oklahoma_PN = mkPN "Oklahoma" ; --MAN +old_fashioned_A = mkA "vanhanaikainen" ; --MAN +once_Subj = mkSubj "sitten kun" ; --MAN +one_time_A = mkA "ainutkertainen" ; --MAN +onto_Prep = mkPrep "päälle" ; --MAN +optical_A = mkA "optinen" ; --MAN +outside_Prep = mkPrep "ulkopuolella" ; --MAN +painting_N = mkN "maalaus" ; --MAN +pall_N = mkN "paariliina" ; --MAN +panama_PN = mkPN "Panama" ; --MAN +party_N = mkN "puolue" ; --MAN +pend_V = mkV "riippua" ; --MAN +pennsylvania_PN = mkPN "Pennsylvania" ; --MAN +pent_up_A = mkA "patoutunut" ; --MAN +people_N = mkN "kansa" ; --MAN +per_Prep = mkPrep "per" nominative ; --MAN +perhaps_Adv = mkAdv "ehkä" ; --MAN +philippines_PN = mkPN "Filippiinit" ; --MAN +pittsburgh_PN = mkPN "Pittsburgh" ; --MAN +plastics_N = mkN "muovi" ; --MAN +plus_Conj = mkConj "plus" ; --MAN +poland_PN = mkPN "Puola" ; --MAN +present_day_A = mkA "tämänhetkinen" ; --MAN +pretoria_PN = mkPN "Pretoria" ; --MAN +pretty_AdA = mkAdA "melkoisen" ; --MAN +publishing_A = mkA "julkaiseva" ; --MAN +quite_AdA = mkAdA "melko" ; --MAN +quite_Predet = mkPredet "aika" ; --MAN +rank_N = mkN "arvoaste" ; --MAN +record_N = mkN "ennätys" ; --MAN +regime_1_N = mkN "hallinto" ; --MAN +representativeMasc_N = mkN "edustaja" ; --MAN +researcherMasc_N = mkN "tutkija" ; --MAN +resident_N = mkN "asukas" ; --MAN +role_1_N = mkN "rooli" ; --MAN +rome_PN = mkPN "Rooma" ; --MAN +run_N = mkN "juoksu" ; --MAN +san_antonio_PN = mkPN "San Antonio" ; --MAN +see_VS = mkVS (mkV "pitää" "huolta") ; --MAN +shield_N = mkN "kilpi" ; --MAN +sidney_PN = mkPN "Sidney" ; --MAN +signale_VS = mkVS "viestittää" ; --MAN +since_then_Adv = mkAdv "siitä lähtien" ; --MAN +sincere_A = mkA "vilpitön" ; --MAN +singapore_PN = mkPN "Singapore" ; --MAN +site_N = mkN "sijaintipaikka" ; --MAN +so_PConj = mkPConj "niinpä" ; --MAN +so_Subj = mkSubj "niin että" ; --MAN +so_called_A = mkA "niinsanottu" ; --MAN +soft_A = mkA "pehmeä" ; --MAN +somehow_AdV = mkAdV "jotenkin" ; --MAN +soon_AdV = mkAdV "pian" ; --MAN +soon_Adv = mkAdv "pian" ; --MAN +spain_PN = mkPN "Espanja" ; --MAN +stabilize_V = mkV "vakiinnuttaa" ; --MAN +start_V = mkV "aloittaa" ; --MAN +start_ing_VV = mkVV "alkaa" ; --MAN +start_to_VV = mkVV "alkaa" ; --MAN +stockholm_PN = mkPN "Tukholma" ; --MAN +strike_N = mkN "lakko" ; --MAN +such_Predet = mkPredet "sellainen" ; --MAN +such_as_Prep = mkPrep "kuten" nominative ; --MAN +sweden_PN = mkPN "Ruotsi" ; --MAN +syndicate_N = mkN "syndikaatti" ; --MAN +taipei_PN = mkPN "Taipei" ; --MAN +taiwan_PN = mkPN "Taiwan" ; --MAN +texas_PN = mkPN "Teksas" ; --MAN +throughout_Prep = mkPrep "läpi koko" genitive ; --MAN +tokyo_PN = mkPN "Tokio" ; --MAN +toronto_PN = mkPN "Toronto" ; --MAN +tough_A = mkA "tiukka" ; --MAN +toward_Prep = mkPrep partitive "kohti" ; --MAN +towards_Prep = mkPrep partitive "kohti" ; --MAN +turkey_PN = mkPN "Turkki" ; --MAN +typical_1_A = mkA "tyypillinen" ; --MAN +typical_3_A = mkA "tyypillinen" ; --MAN +unheard_of_A = mkA "ennenkuulumaton" ; --MAN +unique_A = mkA "ainutlaatuinen" ; --MAN +unit_3_N = mkN "yksikkö" ; --MAN +universe_N = mkN "niversumi" ; --MAN +unless_Subj = mkSubj "ellei" ; --MAN +unlike_Prep = mkPrep "erilainen kuin" nominative ; --MAN +upon_Prep = mkPrep "päällä" ; --MAN +vietnam_PN = mkPN "Vietnam" ; --MAN +virginia_PN = mkPN "Virginia" ; --MAN +washington_PN = mkPN "Washington" ; --MAN +well_known_A = mkA "tunnettu" ; --MAN +while_Subj = mkSubj "samaan aikaan kuin" ; --MAN +white_collar_A = mkA "valkokauluksinen" ; --MAN +whole_A = mkA "kokonainen" ; --MAN +wyoming_PN = mkPN "Wyoming" ; --MAN +yet_AdV = mkAdV "yhä" ; --MAN +zurich_PN = mkPN "Zürich" ; --MAN + +part_of_N2 = mkN2 (mkN "osa") (mkPrep elative) ; --MAN +idea_of_N2 = mkN2 (mkN "ajatus") (mkPrep elative) ; --MAN +familiar_with_A2 = mkA2 (mkA "perehtynyt") (mkPrep illative) ; --MAN } diff --git a/lib/src/finnish/stemmed/log.txt b/lib/src/finnish/stemmed/log.txt index cde0b5ed9..d423b6c55 100644 --- a/lib/src/finnish/stemmed/log.txt +++ b/lib/src/finnish/stemmed/log.txt @@ -15,6 +15,15 @@ Set up an experiment with 3220 complete trees from Penn prepared by Krasimir. Fi around 20 missing syntax constructions, 230 missing words +Tests generated by + + gf -run ~/GF/lib/src/ParseEngFin.pgf 4-eng-fin-wsj.txt + +with + + l -treebank -bind PhrUtt NoPConj (UttS (UseCl (TTAnt TPast ASimul) + + 29/3 Added most missing syntax constructions. Some new opers in ParadigmsFin, and 230 more words in DictEngFin: out of 3220 Penn trees now 2721 @@ -26,5 +35,48 @@ After implementing GerundN and GerundNP, only 40 lin with unknowns. But the impl - applying to run-time V prevents correct vowel harmony - composite forms with "minen" should be "mis", e.g. hinnoitteleminendetaljit +Counting funs: + + gf ../GF/lib/src/ParseEng.pgf funs-wsj.txt + +with + + pt -funs PhrUtt NoPConj (UttS (UseCl (TTAnt TPast ASimul) ... + +From this, with some ghci commands, created freq-wsj.txt, showing + +AdvVP 1174 +AdvNP 1075 +UsePron 749 +PossNP 749 +UseV 675 +in_Prep 671 +and_Conj 659 +UseComp 651 +IIDig 620 + +and a total of 4512 fun's used in the 3220 trees. + +Then created a list of missing funs in ParseFin: there are 8820 of them. However, only 80 missing funs appear in the corpus! + +some_Quant 72 +anyPl_Det 44 +part_of_N2 34 +both_Det 32 +most_Det 28 +ComplN2 21 +several_Num 19 +another_Quant 19 +UseN2 16 +neither_Det 11 +CNNumNP 8 +draw_V2 7 +aware_of_A2 7 + +The next thing is to find out why ComplN2 and UseN2 are missing - they should be there. +It turned out that this happens just because there was no N2 in the lexicon. Strange... adding just +"part of" and "idea of" (as well as "familiar with") changes 35 sentences. Now only 9 with unknown +constants. 314 without lin. +