From 5db5964e831d33ad29f73632ed5abc32bf28bffc Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Mon, 20 Apr 2020 14:07:14 +0200 Subject: [PATCH] (Hun) A couple of new paradigms --- src/hungarian/LexiconHun.gf | 10 ++--- src/hungarian/NounMorphoHun.gf | 69 +++++++++++++++++++++++++++++++--- src/hungarian/ParadigmsHun.gf | 8 +++- 3 files changed, 76 insertions(+), 11 deletions(-) diff --git a/src/hungarian/LexiconHun.gf b/src/hungarian/LexiconHun.gf index 80fa8dc9..d69e454b 100644 --- a/src/hungarian/LexiconHun.gf +++ b/src/hungarian/LexiconHun.gf @@ -41,7 +41,7 @@ lin boat_N = mkN "hajó" ; -- lin bone_N = mkN "" ; -- lin boot_N = mkN "" ; -- lin boss_N = mkN "" ; -lin book_N = mkN "könyv" ; +lin book_N = mkN "könyv" harmE ; lin boy_N = mkN "fiú" ; lin bread_N = mkN "kenyér" ; lin break_V2 = mkV2 "szünet" ; @@ -257,7 +257,7 @@ lin old_A = mkA "öreg" ; --also "idős" lin paper_N = mkN "papír" ; -- lin paris_PN = mkPN "Paris" ; lin peace_N = mkN "béke" ; -lin pen_N = mkN "toll" ; +lin pen_N = mkN "toll" "tollat" ; lin person_N = mkN "ember" ; lin planet_N = mkN "bolygó" ; lin plastic_N = mkN "műanyag" ; @@ -383,7 +383,7 @@ lin ugly_A = mkA "csúf" ; -- lin uncertain_A = mkA "" ; -- lin understand_V2 = mkV2 "" ; lin university_N = mkN "egyetem" ; -lin village_N = mkN "falu" ; +lin village_N = mkN "falu" "falut" "falvak"; -- lin vomit_V = mkV2 "" ; -------- @@ -407,11 +407,11 @@ lin wine_N = mkN "bor" ; lin wing_N = mkN "szárny" ; -- lin wipe_V2 = mkV2 "" ; --lin woman_N = mkN "nő" "k" harmO ; -lin woman_N = mkN "nő" "nőket" ; +lin woman_N = mkN "nő" "nőt" ; -- lin wonder_VQ = mkVQ "" ; lin wood_N = mkN "fa" ; --same as tree lin worm_N = mkN "féreg" ; --also "kukac" --- lin write_V2 = mkV2 "" ; +lin write_V2 = mkV2 (mkV "írok" "írsz" "ír" "írunk" "írtok" "írnak" "írni") ; lin year_N = mkN "év" ; lin yellow_A = mkA "sárga" ; lin young_A = mkA "fiatal" ; diff --git a/src/hungarian/NounMorphoHun.gf b/src/hungarian/NounMorphoHun.gf index 40acfc2a..f1a37b9b 100644 --- a/src/hungarian/NounMorphoHun.gf +++ b/src/hungarian/NounMorphoHun.gf @@ -61,6 +61,24 @@ oper } ; } ; + -- NB. arguments are Sg Nom, Pl Nom + dFalu : (nomsg : Str) -> (nompl : Str) -> Noun = \falu,falvak -> + let falva = init falvak ; + nFalva = mkNoun falva ; + nFalu = mkNoun falu ; + in {s = \\n,c => case of { + + -- All plural forms and Sg Acc, Sg Sup use the "lova" stem + => nFalva.s ! n ! c ; + -- The rest of the forms are formed with the regular constructor, + -- using "ló" as the stem. + _ => nFalu.s ! n ! c + + } ; + } ; + + + --Handles words like "gyomor, majom, retek" which are "gyomrot, majmot, retket" in accusative (wovel dropping base) --More examples: "ajak, bokor, cukor, csokor, eper, fészek, fodor, gödör, haszon, iker, izom, kölyök, köröm, méreg, piszok, sarok, selyem, szeder, szobor, takony, terem, titok, torok, torony, tükör, vödör" -> -- "ajkat, bokrot, cukrot, csokrot, epret, fészket, fodrot, gödröt, hasznot, ikret, izmot, kölyköt, körmet, mérget, piszkot, sarkot, selymet, szedret, szobrot, taknyot, termet, titkot, torkot, tornyot, tükröt, vödröt" @@ -83,14 +101,40 @@ oper } ; } ; + -- Generic constructor for cases with different stem in Sg Nom and Sg Gen. + -- Assumes that Sg Gen and all plurals have genitive stem, others Nom stem. + dToll : (nom : Str) -> (acc : Str) -> Noun = \toll,tollat -> + let tolla = init tollat ; + nTolla = mkNoun tolla ; + nToll = mkNoun toll ; + in {s = \\n,c => case of { + -- All plural forms and Sg Acc use the "tolla" stem + | => nTolla.s ! n ! c ; + + -- The rest of the forms are formed with the regular constructor, + -- using "majom" as the stem. + _ => nToll.s ! n ! c + } + } ; + -- More words not covered by current paradigms: -- https://cl.lingfil.uu.se/~bea/publ/megyesi-hungarian.pdf -- TODO: falu ~ falva-k (v-case) -- TODO: teher ~ terhet (consonant-crossing) -- TODO: do we need possessive forms? e.g. fiú ~ fia{m,d,tok} - -- regNoun is a /smart paradigm/: it takes one or a couple of forms, + -- All regNoun* are /smart paradigms/: they take one or a couple of forms, -- and decides which (non-smart) paradigm is the most likely to match. +regNounNomAccPl : (nomsg, accsg, nompl : Str) -> Noun = \nsg,asg,npl -> + case of { + <_ + ("u"|"ú"|"ü"|"ű"), -- falu, falvak ; odú, odvak + _ + ("u"|"ú"|"ü"|"ű") + "t", + _ + "v" + #v + "k"> => dFalu nsg npl ; + + -- Fall back to 2-argument smart paradigm + _ => regNounNomAcc nsg asg + } ; + regNounNomAcc : (nom : Str) -> (acc : Str) -> Noun = \n,a -> case of { -- alma, almát @@ -121,7 +165,10 @@ regNounNomAcc : (nom : Str) -> (acc : Str) -> Noun = \n,a -> |<_ + "é", -- lé, levet _ + "e" + #c + #v + "t"> => dLó n a ; - _ => mkNoun n + <_ + #dupl, -- toll, tollat + _ + #dupl + #v + "t"> => dToll n a ; + + _ => dToll n a -- Generic 2-argument constructor } ; regNoun : Str -> Noun = \sgnom -> case sgnom of { @@ -177,11 +224,22 @@ oper v : pattern Str = #("a" | "e" | "i" | "o" | "u" | "ö" | "ü" | "á" | "é" | "í" | "ó" | "ú" | "ő" | "ű") ; + back : pattern Str = #("a" | "á" | "o" | "ó" | "u" | "ú") ; + + front_rounded : pattern Str = #("ö" | "ő" | "ü" | "ű") ; + + -- front and back rounded + -- rounded : pattern Str = #("ö" | "ő" | "ü" | "ű" | "o" | "ó" | "u" | "ú") + c : pattern Str = #("b"|"c"|"d"|"f"|"g"|"h"|"j"|"k"|"l"|"m"| "n"|"p"|"q"|"r"|"s"|"t"|"v"|"w"|"x"|"z"| "cs"|"dz"|"gy"|"ly"|"ny"|"sz"|"ty"|"zs"| "dzs") ; + dupl : pattern Str = #("bb"|"cc"|"dd"|"ff"|"gg"|"hh"|"jj"|"kk"|"ll"|"mm"| + "nn"|"pp"|"qq"|"rr"|"ss"|"tt"|"vv"|"ww"|"xx"|"zz"| + "ddzs"|"ccs"|"ddz"|"ggy"|"lly"|"nny"|"ssz"|"tty"|"zzs") ; + -- Only single consonants unigraph : pattern Str = #("b"|"c"|"d"|"f"|"g"|"h"|"j"|"k"|"l"|"m"| "n"|"p"|"q"|"r"|"s"|"t"|"v"|"w"|"x"|"z") ; @@ -194,14 +252,15 @@ oper duplicateLast : Str -> Str = \str -> case str of { x + "dzs" => x + "ddzs" ; - x + "ny" => x + "nny" ; -- takony : takonnyal x + "cs" => x + "ccs" ; x + "dz" => x + "ddz" ; x + "gy" => x + "ggy" ; x + "ly" => x + "lly" ; + x + "ny" => x + "nny" ; x + "sz" => x + "ssz" ; x + "ty" => x + "tty" ; x + "zs" => x + "zzs" ; + x + #dupl => str ; -- Don't duplicate already long consonant -- Base case: just duplicate the single letter x + s@? => x + s + s } ; @@ -237,8 +296,8 @@ oper -- Function to get a harmony from a string getHarm : Str -> Harm = \s -> case s of { - _ + ("a" | "á" | "o" | "ó" | "u" | "ú") + _ => H_a ; - _ + ("ö" | "ő" | "ü") + _ => H_o ; + _ + #back + _ => H_a ; + _ + #front_rounded + _ => H_o ; _ => H_e } ; diff --git a/src/hungarian/ParadigmsHun.gf b/src/hungarian/ParadigmsHun.gf index da89f54f..ddf4499d 100644 --- a/src/hungarian/ParadigmsHun.gf +++ b/src/hungarian/ParadigmsHun.gf @@ -19,7 +19,8 @@ oper mkN : overload { mkN : (sgnom : Str) -> N ; -- Predictable nouns from singular nominative. Accusative vowel is o for back harmony. No stem lowering (TODO better explanation/examples) - mkN : (sgnom : Str) -> (sggen : Str) -> N ; -- Singular nominative and accusative. Takes care of cases like … TODO example + mkN : (sgnom, sggen : Str) -> N ; -- Singular nominative and accusative. Takes care of cases like … TODO example + mkN : (sgnom, sggen, plnom : Str) -> N ; -- Singular nominative, singular accusative, plural nominative, e.g. `mkN "falu" "falut" "falvak"` mkN : (férfi : Str) -> (harm : Harmony) -> (ak : Str) -> N ; -- Noun with unpredictable vowel harmony and plural allomorph } ; @@ -125,6 +126,9 @@ oper mkN : Str -> Str -> N = \n,a-> lin N (regNounNomAcc n a) ; + mkN : Str -> Str -> Str -> N = + \n,a,pln-> lin N (regNounNomAccPl n a pln) ; + mkN : Str -> Harmony -> N = \s,h -> lin N (mkNounHarm h (pluralAllomorph s) s) ; @@ -164,6 +168,8 @@ oper mkV : (sg3 : Str) -> V = \v -> lin V (mkVerb v) ; -- mkV : (nore : Str) -> (hada : V) -> V = \nore,hada -> hada ** { -- s = \\vf => nore + hada.s ! vf} ; + mkV : (x1,_,_,_,_,_,x7 : Str) -> V = \sg1,sg2,sg3,pl1,pl2,pl3,inf -> + lin V (mkVerbFull sg1 sg2 sg3 pl1 pl2 pl3 inf) ; } ; copula = ResHun.copula ;