resource NounMorphoHun = ParamHun ** open Prelude, Predef in { oper Noun = {s : Number => Case => Str} ; -- Paradigm functions -- http://www.cse.chalmers.se/~aarne/articles/smart-preprint.pdf -- Words like alma, kefe: dAlma : Str -> Noun = \alma -> let almá : Str = lengthen alma ; -- Apply mkNoun to the lengthened stem "almá" or "kefé" nAlmá : Noun = mkNoun almá ; in {s = \\n,c => case of { -- Singular nominative uses the given form, e.g. "alma" or "kefe" => alma ; -- The rest of the forms are formed with the regular constructor, -- using "almá" or "kefé" as the stem. _ => nAlmá.s ! n ! c } ; } ; -- Handles words like "madár, nyár, név, bogár" with shortened stem vowel in plural -- No special case here dMadár : Str -> Noun = \madár -> let r = last madár ; madá = init madár ; mada = shorten madá ; -- shortens vowels a = last mada ; madara = mada + r + a ; nMadara = mkNounHarm (getHarm madara) "k" madara ; nMadár = mkNoun madár ; in {s = \\n,c => case of { -- All plural forms and Sg Acc use the "madara"/"neve" stem | => nMadara.s ! n ! c ; -- The rest of the forms are formed with the regular constructor, -- using "madár"/"név" as the stem. _ => nMadár.s ! n ! c } ; } ; --Handles words like "ló, lé, kő" which are "lovak, levek, kövek" in plural. -- "lovon" instead of "lón" fixed but that gives the following problems: --TODO: special case "lén" not "leven" --TODO: also "kövön" not "köven", but that is due to H_e, which is needed for "köveket" so it's conflicting dLó : Str -> Noun = \ló -> let lo = shorten ló ; lov = lo + "v" ; ak : Str = case ló of { _ + ("ö" | "ő") => "ek" ; _ + ("o" | "ó") => "ak" ; lé => shorten (last lé) + "k"} ; harmonyPlural : Harm = case ló of { _ + ("ö" | "ő") => H_e ; -- All plural allomorphs have E harmony, singular ones have O. _ + ("o" | "ó") => H_a ; lé => getHarm (lé)} ; nLov = mkNounHarm harmonyPlural ak lov ; nLó = mkNoun ló ; in {s = \\n,c => case of { -- All plural forms and Sg Acc, Sg Sup use the "lov" stem | | => nLov.s ! n ! c ; -- The rest of the forms are formed with the regular constructor, -- using "ló" as the stem. _ => nLó.s ! n ! c } ; } ; --Handles words like "tó, hó"" which are "tavak, havak" in plural. --(Since I only have these examples for now I do a simplified case with ó, a) -- "tavon" instead of "tón" case fixed, works automatically with the Sup rules --TODO: szó special case which fulfills the plural cases but not the or case ("szót" not "szavat") dTó : Str -> Noun = \tó -> let t = init tó ; tav = t + "av" ; nTav = mkNounHarm H_a "ak" tav ; nTó = mkNoun tó ; in {s = \\n,c => case of { -- All plural forms and Sg Acc use the "tav" stem || => nTav.s ! n ! c ; -- The rest of the forms are formed with the regular constructor, -- using "tó" as the stem. _ => nTó.s ! n ! c } ; } ; --Handles words like "gyomor, majom, retek" which are "gyomrot, majmot, retket" in accusative (wovel dropping base) --More examples: "ajak, bokor, cukor, csokor, eper, fészek, fodor, gödör, haszon, iker, izom, kölyök, köröm, méreg, piszok, sarok, selyem, szeder, szobor, takony, terem, titok, torok, torony, tükör, vödör" -> -- "ajkat, bokrot, cukrot, csokrot, epret, fészket, fodrot, gödröt, hasznot, ikret, izmot, kölyköt, körmet, mérget, piszkot, sarkot, selymet, szedret, szobrot, taknyot, termet, titkot, torkot, tornyot, tükröt, vödröt" -- case handled dMajom : Str -> Noun = \majom -> -- Str*Str is syntactic sugar for {p1 : Str ; p2 : Str} ; -- confusing syntax: you can't write let : Str*Str = … -- it has to be called something else, and then you -- can get "tako" and "ny" with p1, p2. let tako_ny : Str*Str = case majom of { x + dzs@#trigraph => ; x + zs@#digraph => ; -- ? pattern matches exactly 1 character x + s@? => } ; tako = tako_ny.p1 ; ny = tako_ny.p2 ; nyo = ny + last tako ; tak = init tako ; taknyo = tak + nyo ; nMajmo = mkNounHarm (getHarm taknyo) "k" taknyo ; nMajom = mkNoun majom ; in {s = \\n,c => case of { -- All plural forms and Sg Acc and Sg Sup use the "majmo" stem | | => nMajmo.s ! n ! c ; -- The rest of the forms are formed with the regular constructor, -- using "majom" as the stem. _ => nMajom.s ! n ! c } ; } ; --Handles words like "sátor, álom, alkalom, farok, halom, vászon" -- "sátrat, álmat, alkalmat, farkat, halmat, vásznat" -- (bátor not noun) dFarok : Str -> Noun = \farok -> let k = last farok ; far = init (init farok) ; fark = far + k ; nFark = mkNounHarm (getHarm fark) "ak" fark ; nFarok = mkNoun farok ; in {s = \\n,c => case of { -- All plural forms and Sg Acc and Sg Sup use the "fark" stem | | => nFark.s ! n ! c ; -- The rest of the forms are formed with the regular constructor, -- using "farok" as the stem. _ => nFarok.s ! n ! c } ; } ; -- More words not covered by current paradigms: -- https://cl.lingfil.uu.se/~bea/publ/megyesi-hungarian.pdf -- TODO: falu ~ falva-k (v-case) -- TODO: teher ~ terhet (consonant-crossing) -- TODO: do we need possessive forms? e.g. fiú ~ fia{m,d,tok} -- regNoun is a /smart paradigm/: it takes one or a couple of forms, -- and decides which (non-smart) paradigm is the most likely to match. regNoun : Str -> Noun = \sgnom -> case sgnom of { _ + "a"|"e" => dAlma sgnom ; _ + ("á"|"é") + ? => dMadár sgnom ; _ + ("ó"|"ő") => dLó sgnom ; _ + "alom" => dFarok sgnom ; _ + "elem" => dMajom sgnom ; -- TODO: more non-smart paradigms + more pattern matching -- TODO: smart paradigms with >1 form. Which forms are the most descriptive? _ => mkNoun sgnom -- Fall back to the regular paradigm } ; -------------------------------------------------------------------------------- -- Following code by EG in 2009 (?), comments and some additions by IL 2020 param -- Harmony types Harm = H_a | H_e | H_o ; oper -- Vowels as a pattern. v : pattern Str = #("a" | "e" | "i" | "o" | "u" | "ö" | "ü" | "á" | "é" | "í" | "ó" | "ú" | "ő" | "ű") ; -- Digraphs digraph : pattern Str = #("cs"|"dz"|"gy"|"ly"|"ny"|"sz"|"ty"|"zs") ; -- Trigraphs trigraph : pattern Str = #("dzs") ; duplicateLast : Str -> Str = \str -> case str of { x + "dzs" => x + "ddzs" ; x + "ny" => x + "nny" ; -- takony : takonnyal x + "cs" => x + "ccs" ; x + "dz" => x + "ddz" ; x + "gy" => x + "ggy" ; x + "ly" => x + "lly" ; x + "sz" => x + "ssz" ; x + "ty" => x + "tty" ; x + "zs" => x + "zzs" ; -- Base cacse: just duplicate the single letter x + s@? => x + s + s } ; -- Function to test if a string ends in a vowel vowFinal : Str -> Bool = \str -> case str of { _ + #v => True ; -- Matching a string against a pattern. _ => False } ; lengthen : Str -> Str = \str -> case str of { x + "a" => x + "á" ; x + "e" => x + "é" ; x + "i" => x + "í" ; x + "o" => x + "ó" ; x + "u" => x + "ú" ; x + "ö" => x + "ő" ; x + "ü" => x + "ű" ; _ => Predef.error "Lengthening not applicable to" ++ str } ; shorten : Str -> Str = \str -> case str of { x + "á" => x + "a" ; x + "é" => x + "e" ; x + "í" => x + "i" ; x + "ó" => x + "o" ; x + "ú" => x + "u" ; x + "ő" => x + "ö" ; x + "ű" => x + "ü" ; _ => Predef.error "Shortening not applicable to" ++ str } ; -- Function to get a harmony from a string getHarm : Str -> Harm = \s -> case s of { _ + ("a" | "á" | "o" | "ó" | "u" | "ú") + _ => H_a ; _ + ("ö" | "ő" | "ü") + _ => H_o ; _ => H_e } ; -- Used as a table of allomorphs for a give case. HarmForms : Type = Harm => Str ; -- Functions for constructing a HarmForms table. harm3 : Str -> Str -> Str -> HarmForms = \a,e,o -> table { H_a => a ; H_e => e ; H_o => o } ; harm : Str -> Str -> HarmForms = \a,e -> harm3 a e e ; harm1 : Str -> HarmForms = \i -> harm i i ; -- Variant of case forms when the noun stem ends in consonant. endCaseCons : Case -> HarmForms = \c -> case c of { Nom => harm1 [] ; Acc => harm3 "ot" "et" "öt" ; Dat => harm "nak" "nek" ; Ill => harm "ba" "be" ; Ine => harm "ban" "ben" ; Ela => harm "ból" "ből" ; All => harm3 "hoz" "hez" "höz" ; Ade => harm "nál" "nél" ; Abl => harm "tól" "től" ; Sub => harm "ra" "re" ; Sup => harm3 "on" "en" "ön" ; Del => harm "ról" "ről" ; Cau => harm1 "ért" ; Ins => harm "al" "el" ; Tra => harm "á" "é" -- Ess => harm "stul" "stül" ; -- Essive-modal 'with and its parts' -- Ter => harm1 "ig" ; -- Terminative 'as far as ' -- For => harm1 "ként" ; -- Formal 'as ' -- Tem => harm1 "kor" -- Temporal 'at '. Only used with numerals. } ; -- Variant where accusative has the allomorph -at endCaseConsAccAt : Case -> HarmForms = \c -> case c of { Acc => harm3 "at" "et" "öt" ; _ => endCaseCons c } ; -- Variant of case forms when the noun stem ends in vowel. endCaseVow : Case -> HarmForms = \c -> case c of { Acc => harm1 "t" ; Sup => harm1 "n" ; Ins => harm "val" "vel" ; Tra => harm "vá" "vé" ; -- Other forms are shared with endCaseCons. _ => endCaseCons c } ; -- Function to return a plural allomorph given the stem (e.g. név, almá). -- TODO: seems to be many "ak" cases, add? -- Examples: ág, ágy, ár, díj, fal, fog, gyár, hal, has, hát, ház, hold, láz, lyuk, nyak, olaj, oldal, toll, ujj, vonal pluralAllomorph : (stem : Str) -> Str = \stem -> case vowFinal stem of { True => "k" ; False => harm3 "ok" "ek" "ök" ! getHarm stem } ; -- Harmony and plural allomorph read from the singular nominative mkNoun : Str -> Noun = \w -> mkNounHarm (getHarm w) (pluralAllomorph w) w ; -- Harmony and plural allomorph given explicitly (check if the True makes it bad) mkNounHarm : Harm -> (plural : Str) -> Str -> Noun = mkNounHarmAcc True ; mkNounHarmAcc : (useAt : Bool) -> Harm -> (plural : Str) -> Str -> Noun = \useAt,h,plural,w -> let endCaseSg : Case -> HarmForms = case of { => endCaseVow ; => endCaseConsAccAt ; _ => endCaseCons} ; endCasePl : Case -> HarmForms = case of { <"ak",_> => endCaseConsAccAt ; <_,True> => endCaseConsAccAt ; _ => endCaseCons } ; -- Last consonant doubles before instrumental and translative duplConsStem : Str = case vowFinal w of { True => w ; False => duplicateLast w } ; -- Noun is {s : Number => Case => Str}, we construct nested tables. in {s = table { Sg => table { -- Double the last letter (if consonant) before Ins, Tra c@(Ins|Tra) => duplConsStem + endCaseSg c ! h ; c@_ => w + endCaseSg c ! h } ; Pl => table { -- Double the plural k before Ins, Tra c@(Ins|Tra) => w + plural + "k" + endCasePl c ! h ; -- endCaseCons, because we only use -k as plural morpheme. -- If we add possessive forms with allomorph -i, then revise. c@_ => w + plural + endCasePl c ! h } } } ; }