some new opers in ParadigmsFin, and 200 more words in DictEngFin: out of 3220 Penn trees now 2721 are completely translated (but mostly not so well...)

This commit is contained in:
aarne
2013-03-29 10:13:04 +00:00
parent 27a1811446
commit eb791244bd
5 changed files with 334 additions and 12 deletions

View File

@@ -4,7 +4,7 @@ concrete DictEngFin of DictEngAbs = CatFin ** open ParadigmsFin,
(S = StructuralFin),
(L = LexiconFin),
-- SyntaxFin,
MorphoFin, ParadigmsFin, (X = ConstructX), MakeStructuralFin, Kotus, WNKotus, StemFin, Prelude in {
MorphoFin, ParadigmsFin, Kotus, WNKotus, StemFin, Prelude in {
flags coding=utf8 ;
@@ -59074,6 +59074,247 @@ zymotic_A = mkWA "tsymoosiin" "liittyvä" ;
oper tehda_V = mkV (lin VK {s = c71 "tehdä"}) ;
-- another batch of additions, 29/3/2013
lin
absent_Prep = mkPrep "poissa" elative ;
across_Prep = mkPrep "poikki" ;
alabama_PN = mkPN "Alabama" ;
albania_PN = mkPN "Albania" ;
along_Prep = mkPrep "pitkin" ;
already_AdV = mkAdV "jo" ;
america_PN = mkPN "Amerikka" ;
amid_Prep = mkPrep "keskellä" ;
amp_Conj = mkConj "&" ;
amsterdam_PN = mkPN "Amsterdam" ;
apart_from_Prep = mkPrep "lisäksi" ;
area_1_N = mkN "ala" ;
arizona_PN = mkPN "Arizona" ;
around_Prep = mkPrep "ympärillä" ;
as_Subj = mkSubj "kun" ;
as_long_as_Subj = mkSubj "niin kauan kun" ;
as_well_as_Conj = mkConj "yhtä hyvin kuin" ;
athens_PN = mkPN "Ateena" ;
australia_PN = mkPN "Australia" ;
austria_PN = mkPN (mkN "Itä" (mkN "valta")) ;
bare_A = mkA "paljas" ;
bear_N = mkN "karhu" ;
because_of_Prep = mkPrep "vuoksi" ;
begin_VV = mkVV "alkaa" ;
below_Prep = mkPrep "alapuolella" ;
beneath_Prep = mkPrep "alapuolella" ;
benefit_V = mkV "hyötyä" ;
besides_Prep = mkPrep "rinnalla" ;
beyond_Prep = mkPrep "saavuttamattomissa" ;
bonn_PN = mkPN "Bonn" ;
boston_PN = mkPN "Boston" ;
brazil_PN = mkPN "Brasilia" ;
brew_N = mkN "käymis_tuote" ;
britain_PN = mkPN "Britannia" ;
but_Prep = mkPrep "paitsi" nominative ;
but_Subj = mkSubj "mutta" ;
california_PN = mkPN "Kalifornia" ;
care_N = mkN "hoito" ;
chicago_PN = mkPN "Chicago" ;
china_PN = mkPN "Kiina" ;
close_A = mkA "läheinen" ;
columbia_PN = mkPN "Kolumbia" ;
comment_V = mkV "kommentoida" ;
communist_A = mkA "kommunistinen" ;
complication_N = mkN "komplikaatio" ;
connecticut_PN = mkPN "Connecticut" ;
crystalline_A = mkA "kiteinen" ;
cut_N = mkN "leikkaus" ;
dallas_PN = mkPN "Dallas" ;
default_V = mkV "laiminlyödä" ;
denizen_N = mkN "asukas" ;
denver_PN = mkPN "Denver" ;
differ_V = mkV "eritä" ;
disagree_V = mkV vOlla "eri mieltä" ;
discretionary_A = mkA "harkinnanvarainen" ;
discriminatory_A = mkA "erotteleva" ;
duty_free_A = mkA "verovapaa" ;
editorial_A = mkA "toimituksellinen" ;
else_Adv = mkAdv "muutoin" ;
encouragement_N = mkN "rohkaisu" ;
ensure_VS = mkVS "varmistaa" ;
essential_A = mkA "olennainen" ;
europe_PN = mkPN "Eurooppa" ;
even_though_Subj = mkSubj "vaikka" ;
expensive_A = mkA "kallis" ;
fact_finding_A = mkA (mkN "tietoa" (mkN "etsivä")) ;
far_reaching_A = mkA "kauaskantoinen" ;
feel_VS = mkVS "tuntea" ;
feel_V = mkV "tuntea" ;
feude_V = mkV "vihoitella" ;
finland_PN = mkPN "Suomi" ;
first_rate_A = mkA "ensiluokkainen" ;
florida_PN = mkPN "Florida" ;
for_starters_Adv = mkAdv "alkajaisiksi" ;
foreclose_V = mkV (mkV "sulkea") "markkinoilta" ;
four_part_A = mkA "neljänkeskeinen" ;
frankfurt_PN = mkPN "Frankfurt" ;
free_A = mkA "vapaa" ;
funeral_N = mkN "hautajaistilaisuus" ;
further_AdV = mkAdV "edelleen" ;
georgia_PN = mkPN "Georgia" ;
great_A = mkA "suurenmoinen" ;
greece_PN = mkPN "Kreikka" ;
half_Predet = mkPredet "puoliksi" ;
heavy_handed_A = mkA (mkN "raskas" (mkN "kätinen")) ;
help_N = mkN "apu" ;
high_priced_A = mkA "hintava" ;
hollywood_PN = mkPN "Hollywood" ;
honest_A = mkA "rehellinen" ;
hong_kong_PN = mkPN "Hong Kong" ;
hors_de_combat_A = mkA "haavoittunut" ;
hungary_PN = mkPN "Unkari" ;
hyperinflation_N = mkN "hyperinflaatio" ;
in_addition_Adv = mkAdv "lisäksi" ;
in_addition_to_Prep = mkPrep "lisäksi" ;
in_front_of_Prep = mkPrep "edessä" ;
india_PN = mkPN "Intia" ;
indianapolis_PN = mkPN "Indianapolis" ;
indonesia_PN = mkPN "Indonesia" ;
inside_Prep = mkPrep "sisällä" ;
instead_of_Prep = mkPrep "sijasta" ;
intimate_A = mkA "läheinen" ;
israel_PN = mkPN "Israel" ;
japan_PN = mkPN "Japani" ;
japaneseMasc_N = mkN "japanilainen" ;
just_AdV = mkAdV "pelkästään" ;
just_Predet = mkPredet "pelkästään" ;
kansas_PN = mkPN "Kansas" ;
key_A = mkA "ratkaiseva" ;
late_A = mkA "myöhäinen" ;
libya_PN = mkPN "Libya" ;
lie_1_V = mkV "maata" ;
lie_2_V = mkV "valehdella" ;
los_angeles_PN = mkPN "Los Angeles" ;
louisiana_PN = mkPN "Louisiana" ;
low_A = mkA "matala" ;
madrid_PN = mkPN "Madrid" ;
make_it_V = mkV "onnistua" ;
mandatory_A = mkA "pakollinen" ;
marketplace_N = mkN "markkinapaikka" ;
marriage_N = mkN "avioliitto" ;
maturity_3_N = mkN "kypsyys" ;
maybe_Adv = mkAdv "ehkä" ;
media_N = mkN "media" ;
mexico_PN = mkPN "Mexico" ;
miami_PN = mkPN "Miami" ;
milan_PN = mkPN "Milano" ;
minneapolis_PN = mkPN "Minneapolis" ;
mississippi_PN = mkPN "Mississippi" ;
mistrial_N = mkN "mistraali" ;
mod_cons_N = mkN "mukavuus" ;
more_than_AdN = mkAdN "yli" ;
moscow_PN = mkPN "Moskova" ;
namibia_PN = mkPN "Namibia" ;
nearby_A = mkA "läheinen" ;
neither7nor_DConj = mkConj "ei" "eikä" ;
never_AdV = mkAdV "koskaan" ;
nevertheless_Adv = mkAdv "kuitenkin" ;
new_york_PN = mkPN "New York" ;
nicaragua_PN = mkPN "Nicaragua" ;
no_longer_AdV = mkAdV "enää" ;
norway_PN = mkPN "Norja" ;
often_AdV = mkAdV "usein" ;
ohio_PN = mkPN "Ohio" ;
oklahoma_PN = mkPN "Oklahoma" ;
old_fashioned_A = mkA "vanhanaikainen" ;
once_Subj = mkSubj "sitten kun" ;
one_time_A = mkA "ainutkertainen" ;
onto_Prep = mkPrep "päälle" ;
optical_A = mkA "optinen" ;
outside_Prep = mkPrep "ulkopuolella" ;
painting_N = mkN "maalaus" ;
pall_N = mkN "paariliina" ;
panama_PN = mkPN "Panama" ;
party_N = mkN "puolue" ;
pend_V = mkV "riippua" ;
pennsylvania_PN = mkPN "Pennsylvania" ;
pent_up_A = mkA "patoutunut" ;
people_N = mkN "kansa" ;
per_Prep = mkPrep "per" nominative ;
perhaps_Adv = mkAdv "ehkä" ;
philippines_PN = mkPN "Filippiinit" ;
pittsburgh_PN = mkPN "Pittsburgh" ;
plastics_N = mkN "muovi" ;
plus_Conj = mkConj "plus" ;
poland_PN = mkPN "Puola" ;
present_day_A = mkA "tämänhetkinen" ;
pretoria_PN = mkPN "Pretoria" ;
pretty_AdA = mkAdA "melkoisen" ;
publishing_A = mkA "julkaiseva" ;
quite_AdA = mkAdA "melko" ;
quite_Predet = mkPredet "aika" ;
rank_N = mkN "arvoaste" ;
record_N = mkN "ennätys" ;
regime_1_N = mkN "hallinto" ;
representativeMasc_N = mkN "edustaja" ;
researcherMasc_N = mkN "tutkija" ;
resident_N = mkN "asukas" ;
role_1_N = mkN "rooli" ;
rome_PN = mkPN "Rooma" ;
run_N = mkN "juoksu" ;
san_antonio_PN = mkPN "San Antonio" ;
see_VS = mkVS (mkV "pitää" "huolta") ;
shield_N = mkN "kilpi" ;
sidney_PN = mkPN "Sidney" ;
signale_VS = mkVS "viestittää" ;
since_then_Adv = mkAdv "siitä lähtien" ;
sincere_A = mkA "vilpitön" ;
singapore_PN = mkPN "Singapore" ;
site_N = mkN "sijaintipaikka" ;
so_PConj = mkPConj "niinpä" ;
so_Subj = mkSubj "niin että" ;
so_called_A = mkA "niinsanottu" ;
soft_A = mkA "pehmeä" ;
somehow_AdV = mkAdV "jotenkin" ;
soon_AdV = mkAdV "pian" ;
soon_Adv = mkAdv "pian" ;
spain_PN = mkPN "Espanja" ;
stabilize_V = mkV "vakiinnuttaa" ;
start_V = mkV "aloittaa" ;
start_ing_VV = mkVV "alkaa" ;
start_to_VV = mkVV "alkaa" ;
stockholm_PN = mkPN "Tukholma" ;
strike_N = mkN "lakko" ;
such_Predet = mkPredet "sellainen" ;
such_as_Prep = mkPrep "kuten" nominative ;
sweden_PN = mkPN "Ruotsi" ;
syndicate_N = mkN "syndikaatti" ;
taipei_PN = mkPN "Taipei" ;
taiwan_PN = mkPN "Taiwan" ;
texas_PN = mkPN "Teksas" ;
throughout_Prep = mkPrep "läpi koko" genitive ;
tokyo_PN = mkPN "Tokio" ;
toronto_PN = mkPN "Toronto" ;
tough_A = mkA "tiukka" ;
toward_Prep = mkPrep partitive "kohti" ;
towards_Prep = mkPrep partitive "kohti" ;
turkey_PN = mkPN "Turkki" ;
typical_1_A = mkA "tyypillinen" ;
typical_3_A = mkA "tyypillinen" ;
unheard_of_A = mkA "ennenkuulumaton" ;
unique_A = mkA "ainutlaatuinen" ;
unit_3_N = mkN "yksikkö" ;
universe_N = mkN "niversumi" ;
unless_Subj = mkSubj "ellei" ;
unlike_Prep = mkPrep "erilainen kuin" nominative ;
upon_Prep = mkPrep "päällä" ;
vietnam_PN = mkPN "Vietnam" ;
virginia_PN = mkPN "Virginia" ;
washington_PN = mkPN "Washington" ;
well_known_A = mkA "tunnettu" ;
while_Subj = mkSubj "samaan aikaan kuin" ;
white_collar_A = mkA "valkokauluksinen" ;
whole_A = mkA "kokonainen" ;
wyoming_PN = mkPN "Wyoming" ;
yet_AdV = mkAdV "yhä" ;
zurich_PN = mkPN "Zürich" ;
}

View File

@@ -2,16 +2,13 @@ import qualified Data.Set as S
-- comment out words that are predefined in another lexicon
-- runghc ElimPredef.hs <DictEngFin.gf
-- removeFile = "predef.txt"
-- removeMsg = "PREDEF"
removeFile = "KoeFin.gf"
removeMsg = "MANUAL"
-- also used for temporarily eliminating whatever from compilation
--removeFile = "commentOut"
--removeMsg = "POSTPONE"
removeFile = "t-nouns"
removeMsg = "PLURNOUN"
main = do
predefs <- readFile removeFile >>= return . S.fromList . map (head . words) . lines
interact (unlines . map (elimPredef predefs) . lines)

View File

@@ -0,0 +1,29 @@
-- convert annotated word list to GF lexicon
import Data.Char
main =
interact (unlines . map (unwords . mkEntry . words) . lines)
-- [bare_A] paljas
mkEntry (fun_:trans) = [fun, "=", oper, args, ";"] where
fun = tail (init fun_) -- unbracket
(name,cat) = let (tac,eman) = span (/= '_') (reverse fun) in (reverse (tail eman),reverse tac)
oper = "mk" ++ cat
args = case cat of
'V':_ -> unwords (map quoteIf trans)
"Prep" -> unwords (map quoteIf trans)
_ | null trans -> quote (mkUpper name)
_ -> quote (unwords trans)
quote s = "\"" ++ s ++ "\""
-- [absent_Prep] poissa +elative
quoteIf s = case s of
'+':cs -> cs
_ -> quote s
mkUpper w = case w of
c:cs -> toUpper c : cs
_ -> w

View File

@@ -69,6 +69,20 @@ oper
postGenPrep : Str -> Prep ; -- genitive postposition, e.g. "takana"
casePrep : Case -> Prep ; -- just case, e.g. adessive
mkPrep = overload {
mkPrep : Case -> Prep
= casePrep ;
mkPrep : Str -> Prep
= postGenPrep ;
mkPrep : Case -> Str -> Prep
= postPrep ;
mkPrep : Str -> Case -> Prep
= \s,c -> prePrep c s ;
} ;
accusative : Prep
= {c = NPAcc ; s = [] ; isPre = True ; lock_Prep = <>} ;
NK : Type ; -- Noun from DictFin (Kotus)
AK : Type ; -- Adjective from DictFin (Kotus)
VK : Type ; -- Verb from DictFin (Kotus)
@@ -165,6 +179,7 @@ oper
mkV : (huutaa,dan,taa,tavat,takaa,detaan,sin,si,sisi,tanut,dettu,tanee : Str) -> V ; -- worst-case verb
mkV : VK -> V ; -- verb from DictFin (Kotus)
mkV : V -> Str -> V ; -- hakata päälle (particle verb)
--- mkV : Str -> V -> V ; -- laimin+lyödä (prefixed verb)
} ;
-- All the patterns above have $nominative$ as subject case.
@@ -176,6 +191,8 @@ oper
vOlla : V ; -- the verb "be"
olla_V : V
= vOlla ;
--3 Two-place verbs
--
@@ -208,10 +225,23 @@ oper
-- Verbs and adjectives can take complements such as sentences,
-- questions, verb phrases, and adjectives.
mkVV = overload {
mkVV : Str -> VV -- e.g. "yrittää"
= \s -> mkVVf (mkV s) infFirst ;
mkVV : V -> VV -- e.g. "alkaa"
= \v -> mkVVf v infFirst ;
} ;
mkVS = overload {
mkVS : Str -> VS -- e.g. "väittää"
= \s -> lin VS (mk1V s) ;
mkVS : V -> VS -- e.g. "sanoa"
= \v -> lin VS v ;
} ;
mkV0 : V -> V0 ; --%
mkVS : V -> VS ;
mkV2S : V -> Prep -> V2S ; -- e.g. "sanoa" allative
mkVV : V -> VV ; -- e.g. "alkaa"
mkVVf : V -> InfForm -> VV ; -- e.g. "ruveta" infIllat
mkV2V : V -> Prep -> V2V ; -- e.g. "käskeä" genitive
mkV2Vf : V -> Prep -> InfForm -> V2V ; -- e.g. "kieltää" partitive infElat
@@ -233,6 +263,32 @@ oper
V0 : Type ; --%
AS, A2S, AV, A2V : Type ; --%
--2 Structural categories
mkAdV : Str -> AdV
= \s -> lin AdV (ss s) ;
mkAdA : Str -> AdA
= \s -> lin AdA (ss s) ;
mkAdN : Str -> AdN
= \s -> lin AdN (ss s) ;
mkPConj : Str -> PConj
= \s -> lin PConj (ss s) ;
mkSubj : Str -> Subj
= \s -> lin Subj (ss s) ;
mkPredet : Str -> Predet -- invariable Predet, such as "vain"
= \s -> lin Predet {s = \\_,_ => s} ;
mkConj = overload {
mkConj : Str -> Conj
= \y -> {s1 = [] ; s2 = y ; n = Pl ; lock_Conj = <>} ;
mkConj : Str -> Str -> Conj
= \x,y -> {s1 = x ; s2 = y ; n = Pl ; lock_Conj = <>} ;
mkConj : Str -> Str -> Number -> Conj
= \x,y,n -> {s1 = x ; s2 = y ; n = n ; lock_Conj = <>} ;
} ;
--.
-- The definitions should not bother the user of the API. So they are
-- hidden from the document.
@@ -538,7 +594,7 @@ oper
huutaa,huudan,huutaa,huutavat,huutakaa,huudetaan,
huusin,huusi,huusisi,huutanut,huudettu,huutanee : Str) -> V = mk12V ;
mkV : (sana : VK) -> V = \w -> vforms2sverb w.s ** {sc = NPCase Nom ; lock_V = <> ; p = []} ;
mkV : V -> Str -> V = \w,p -> vforms2sverb w.s ** {sc = NPCase Nom ; lock_V = <> ; p = p} ;
mkV : V -> Str -> V = \w,p -> {s = w.s ; sc = w.sc ; lock_V = <> ; h = w.h ; p = p} ;
} ;
mk1V : Str -> V = \s ->
@@ -645,8 +701,7 @@ oper
dirV3 v p = mkV3 v accPrep (casePrep p) ;
dirdirV3 v = dirV3 v allative ;
mkVS v = v ** {lock_VS = <>} ;
mkVV v = mkVVf v infFirst ;
mkVVf v f = v ** {vi = f ; lock_VV = <>} ;
mkVQ v = v ** {lock_VQ = <>} ;

View File

@@ -1,5 +1,5 @@
concrete StructuralFin of Structural = CatFin **
open MorphoFin, ParadigmsFin, (X = ConstructX), MakeStructuralFin, StemFin, Prelude in {
open MorphoFin, ParadigmsFin, (X = ConstructX), StemFin, Prelude in {
flags optimize=all ;