diff --git a/src/hungarian/LexiconHun.gf b/src/hungarian/LexiconHun.gf index 46e5a7665..3f3c4f248 100644 --- a/src/hungarian/LexiconHun.gf +++ b/src/hungarian/LexiconHun.gf @@ -1,5 +1,5 @@ concrete LexiconHun of Lexicon = CatHun ** - open ParadigmsHun in { + open ParadigmsHun, ParamHun, Prelude in { ---- -- A @@ -222,7 +222,7 @@ lin house_N = mkN "ház" ; ---- -- M -lin man_N = mkN "férfi" ; +lin man_N = mkN "férfi" "ak" harmA ; -- force plural allomorph and a-harmony -- lin married_A2 = mkA "" ; -- lin meat_N = mkN "" ; -- lin milk_N = mkN "" ; diff --git a/src/hungarian/NounMorphoHun.gf b/src/hungarian/NounMorphoHun.gf new file mode 100644 index 000000000..21861e4c7 --- /dev/null +++ b/src/hungarian/NounMorphoHun.gf @@ -0,0 +1,204 @@ +resource NounMorphoHun = ParamHun ** open Prelude, Predef in { + +oper + Noun = {s : Number => Case => Str} ; + + -- Paradigm functions + -- http://www.cse.chalmers.se/~aarne/articles/smart-preprint.pdf + + -- Words like alma, kefe: + dAlma : Str -> Noun = \alma -> + let almá : Str = lengthen alma ; + + -- Apply mkNoun to the lengthened stem "almá" or "kefé" + nAlmá : Noun = mkNoun almá ; + in {s = \\n,c => case of { + -- Singular nominative uses the given form, e.g. "alma" or "kefe" + => alma ; + + -- The rest of the forms are formed with the regular constructor, + -- using "almá" or "kefé" as the stem. + _ => nAlmá.s ! n ! c + } ; + } ; + + -- Handles words like "madár", "név" with shortened stem vowel in plural + dMadár : Str -> Noun = \madár -> + let r = last madár ; + madá = init madár ; + mada = shorten madá ; -- shortens vowels + a = last mada ; + madara = mada + r + a ; + nMadara = mkNoun madara ; + nMadár = mkNoun madár ; + in {s = \\n,c => case of { + -- All plural forms and Sg Acc use the "madara"/"neve" stem + | => nMadara.s ! n ! c ; + + -- The rest of the forms are formed with the regular constructor, + -- using "madár"/"név" as the stem. + _ => nMadár.s ! n ! c + + } ; + } ; + + -- TODO: actual paradigm + dSör : Str -> Noun -> \sör -> + let foo : Str = "foo" ; + in mkNoun sör ; + + -- More words not covered by current paradigms: + -- https://cl.lingfil.uu.se/~bea/publ/megyesi-hungarian.pdf + -- falu ~ falva-k + -- gyomor ~ gyomr-ot + -- sátor ~ satr-at + -- TODO: do we need possessive forms? e.g. fiú ~ fia{m,d,tok} + + -- regNoun is a /smart paradigm/: it takes one or a couple of forms, + -- and decides which (non-smart) paradigm is the most likely to match. + regNoun : Str -> Noun = \sgnom -> case sgnom of { + _ + "a"|"e" => dAlma sgnom ; + _ + ("á"|"é") + ? => dMadár sgnom ; + + -- TODO: more non-smart paradigms + more pattern matching + -- TODO: smart paradigms with >1 form. Which forms are the most descriptive? + + _ => mkNoun sgnom -- Fall back to the regular paradigm + } ; + +-------------------------------------------------------------------------------- +-- Following code by EG in 2009 (?), comments and some additions by IL 2020 + +param + -- Harmony types + Harm = H_a | H_e | H_o ; + +oper + + -- Vowels as a pattern. + v : pattern Str = #("a" | "e" | "i" | "o" | "u" | "ö" | "ü" | + "á" | "é" | "í" | "ó" | "ú" | "ő" | "ű") ; + + -- Function to test if a string ends in a vowel + vowFinal : Str -> Bool = \str -> + case str of { + _ + #v => True ; -- Matching a string against a pattern. + _ => False + } ; + + lengthen : Str -> Str = \str -> case str of { + x + "a" => x + "á" ; + x + "e" => x + "é" ; + x + "i" => x + "í" ; + x + "o" => x + "ó" ; + x + "u" => x + "ú" ; + x + "ö" => x + "ő" ; + x + "ü" => x + "ű" ; + _ => Predef.error "Lengthening not applicable to" ++ str + } ; + + shorten : Str -> Str = \str -> case str of { + x + "á" => x + "a" ; + x + "é" => x + "e" ; + x + "í" => x + "i" ; + x + "ó" => x + "o" ; + x + "ú" => x + "u" ; + x + "ő" => x + "ö" ; + x + "ű" => x + "ü" ; + _ => Predef.error "Shortening not applicable to" ++ str + } ; + + -- Function to get a harmony from a string + getHarm : Str -> Harm = \s -> case s of { + _ + ("a" | "á" | "o" | "ó" | "u" | "ú") + _ => H_a ; + _ + ("ö" | "ő" | "ü") + _ => H_o ; + _ => H_e + } ; + + -- Used as a table of allomorphs for a give case. + HarmForms : Type = Harm => Str ; + + -- Functions for constructing a HarmForms table. + harm3 : Str -> Str -> Str -> HarmForms = \a,e,o -> table { + H_a => a ; + H_e => e ; + H_o => o + } ; + harm : Str -> Str -> HarmForms = \a,e -> harm3 a e e ; + harm1 : Str -> HarmForms = \i -> harm i i ; + + -- Variant of case forms when the noun stem ends in consonant. + endCaseCons : Case -> HarmForms = \c -> case c of { + Nom => harm1 [] ; + Acc => harm3 "ot" "et" "öt" ; + Dat => harm "nak" "nek" ; + Ill => harm "ba" "be" ; + Ine => harm "ban" "ben" ; + Ela => harm "ból" "ből" ; + All => harm3 "hoz" "hez" "höz" ; + Ade => harm "nál" "nél" ; + Abl => harm "tól" "től" ; + Sub => harm "ra" "re" ; + Sup => harm3 "on" "en" "ön" ; + Del => harm "ról" "ről" ; + Cau => harm1 "ért" ; + Ins => harm "al" "el" ; + Tra => harm "á" "é" + -- Ess => harm "stul" "stül" ; -- Essive-modal 'with and its parts' + -- Ter => harm1 "ig" ; -- Terminative 'as far as ' + -- For => harm1 "ként" ; -- Formal 'as ' + -- Tem => harm1 "kor" -- Temporal 'at '. Only used with numerals. + } ; + + -- Variant of case forms when the noun stem ends in vowel. + endCaseVow : Case -> HarmForms = \c -> case c of { + Acc => harm1 "t" ; + Sup => harm1 "n" ; + Ins => harm "val" "vel" ; + Tra => harm "vá" "vé" ; + + -- Other forms are shared with endCaseCons. + _ => endCaseCons c + } ; + + -- Function to return a plural allomorph given the stem (e.g. nev, almá). + pluralAllomorph : (stem : Str) -> Str = \stem -> + case vowFinal stem of { + True => "k" ; + False => harm3 "ok" "ek" "ök" ! getHarm stem + } ; + + + -- Harmony and plural allomorph read from the singular nominative + mkNoun : Str -> Noun = \w -> + mkNounHarm (getHarm w) (pluralAllomorph w) w ; + + -- Harmony and plural allomorph given explicitly + mkNounHarm : Harm -> (plural : Str) -> Str -> Noun = \h,plural,w -> + let endCase : Case -> HarmForms = case vowFinal w of { + True => endCaseVow ; + False => endCaseCons } ; + + -- Last consonant doubles before instrumental and translative + lastCons : Str = case vowFinal w of { + True => [] ; + False => last w } ; + + -- Noun is {s : Number => Case => Str}, we construct nested tables. + in {s = table { + Sg => table { + -- Double the last letter (if consonant) before Ins, Tra + c@(Ins|Tra) => w + lastCons + endCase c ! h ; + c@_ => w + endCase c ! h } ; + + Pl => table { + -- Double the plural k before Ins, Tra + c@(Ins|Tra) => w + plural + "k" + endCaseCons c ! h ; + + -- endCaseCons, because we only use -k as plural morpheme. + -- If we add possessive forms with allomorph -i, then revise. + c@_ => w + plural + endCaseCons c ! h } + } + } ; + +} diff --git a/src/hungarian/ParadigmsHun.gf b/src/hungarian/ParadigmsHun.gf index e0ffa8a35..d4f23d993 100644 --- a/src/hungarian/ParadigmsHun.gf +++ b/src/hungarian/ParadigmsHun.gf @@ -9,11 +9,17 @@ oper -- should always use these constants instead of the constructors -- defined in $ResKor$. + Harmony : Type ; + harmA : Harmony ; + harmE : Harmony ; + harmO : Harmony ; --2 Nouns mkN : overload { - mkN : (noun : Str) -> N ; -- Predictable nouns + mkN : (sgnom : Str) -> N ; -- Predictable nouns + mkN : (madár : Str) -> (ak : Str) -> N ; -- Noun with unpredictable plural allomorph + mkN : (férfi : Str) -> (harm : Harmony) -> (ak : Str) -> N ; -- Noun with unpredictable vowel harmony and plural allomorph } ; --2 Adjectives @@ -93,13 +99,28 @@ oper -- The definitions should not bother the user of the API. So they are -- hidden from the document. + Harmony : Type = ResHun.Harm ; + harmA = ResHun.H_a ; + harmE = ResHun.H_e ; + harmO = ResHun.H_o ; + mkN = overload { - mkN : Str -> N = \s -> lin N (mkNoun s) ; + mkN : Str -> N = + \s -> lin N (regNoun s) ; + + mkN : Str -> Str -> N = + \s,ak -> lin N (mkNounHarm (getHarm s) ak s) ; + + mkN : Str -> Harmony -> N = + \s,h -> lin N (mkNounHarm h (pluralAllomorph s) s) ; + + mkN : Str -> (plural : Str) -> Harmony -> N = + \s,pl,h -> lin N (mkNounHarm h pl s) ; } ; mkN2 = overload { - mkN2 : Str -> N2 = \s -> lin N2 (mkNoun s) ; + mkN2 : Str -> N2 = \s -> lin N2 (regNoun s) ; mkN2 : N -> N2 = \n -> lin N2 n ; } ; diff --git a/src/hungarian/ParamHun.gf b/src/hungarian/ParamHun.gf index 7ffe3b70b..c50686335 100644 --- a/src/hungarian/ParamHun.gf +++ b/src/hungarian/ParamHun.gf @@ -3,14 +3,6 @@ resource ParamHun = ParamX ** open Prelude in { -------------------------------------------------------------------------------- -- Phonology -oper - v : pattern Str = #("a" | "e" | "i" | "o" | "u" | "ö" | "ü" | - "á" | "é" | "í" | "ó" | "ú" | "ő" | "ű") ; - - -- not used yet - vowFinal : Str -> Bool = \str -> - case str of {_ + #v => True ; _ => False} ; - -------------------------------------------------------------------------------- -- Morphophonology @@ -31,7 +23,6 @@ param -- | Tem -- Temporal, e.g. hatkor ‘six o’clock’ (from hat ‘6’) ; - Harm = H_a | H_e | H_o ; SubjCase = SCNom | SCDat ; -- Limited set of subject cases diff --git a/src/hungarian/ResHun.gf b/src/hungarian/ResHun.gf index c48a4d5db..d9790d57d 100644 --- a/src/hungarian/ResHun.gf +++ b/src/hungarian/ResHun.gf @@ -5,71 +5,15 @@ -- This module contains operations that are needed to make the -- resource syntax work. -- Some parameters, such as $Number$, are inherited from $ParamX$. -resource ResHun = ParamHun ** open Prelude, Predef, ParamHun in { +resource ResHun = NounMorphoHun ** open Prelude, Predef in { -------------------------------------------------------------------------------- --- Nouns -oper - Noun = {s : Number => Case => Str} ; - - endCase : Case -> HarmForms = \c -> case c of { - Nom => harm1 [] ; - Acc => harm3 "ot" "et" "öt" ; - Dat => harm "nak" "nek" ; - Ill => harm "ba" "be" ; - Ine => harm "ban" "ben" ; - Ela => harm "ból" "ből" ; - All => harm3 "hoz" "hez" "höz" ; - Ade => harm "nál" "nél" ; - Abl => harm "tól" "től" ; - Sub => harm "ra" "re" ; - Sup => harm3 "on" "en" "ön" ; - Del => harm "ról" "ről" ; - Ins => harm "al" "el" ; - Cau => harm1 "ért" ; - Tra => harm "á" "é" -- TODO consonant assimilation - -- Ess => harm "stul" "stül" ; - -- Ter => harm1 "ig" ; - -- For => harm1 "ként" ; - -- Tem => harm1 "kor" - } ; - - endNumber : Number -> HarmForms = \n -> case n of { - Sg => harm1 [] ; - Pl => harm3 "ok" "ek" "ök" -- TODO: vowel assimilation - } ; - - harm3 : Str -> Str -> Str -> HarmForms = \a,e,o -> ; - harm : Str -> Str -> HarmForms = \a,e -> harm3 a e e ; - harm1 : Str -> HarmForms = \i -> harm i i ; - - getHarm : Str -> Harm = \s -> case s of { - _ + ("a" | "á" | "o" | "ó" | "u" | "ú") + _ => H_a ; - _ + ("ö" | "ő" | "ü") + _ => H_o ; - _ => H_e - } ; - - HarmForms : Type = Str * Str * Str ; - - useHarm : Harm -> HarmForms -> Str = \h,ss -> case h of { - H_a => ss.p1 ; - H_e => ss.p2 ; - H_o => ss.p3 - } ; - - putHarmEnding : HarmForms -> Str -> Str = \hs,w -> - w + useHarm (getHarm w) hs ; - - mkNoun : Str -> Noun = \w -> { - s = \\n,c => - let h = getHarm w - in - w + useHarm h (endNumber n) + useHarm h (endCase c) - } ; - ---------------------------------------------- -- NP +-- Noun morphology is in NounMorphoHun + +oper + NounPhrase : Type = { s : Case => Str ; agr : Person*Number ; @@ -90,8 +34,7 @@ oper -- Pronouns Pronoun : Type = NounPhrase ** { - -- poss : { -- for PossPron : Pron -> Quant - -- } ; + --poss : Str ; -- for PossPron : Pron -> Quant } ; -------------------------------------------------------------------------------- @@ -165,8 +108,10 @@ oper mkAdj : Str -> Adjective = \sg -> { s = \\n => - let h = getHarm sg - in sg + useHarm h (endNumber n) + let plural = case n of { + Sg => [] ; + Pl => pluralAllomorph sg } + in sg + plural } ; -------------------------------------------------------------------------------- @@ -201,12 +146,12 @@ oper mkVerb : (sg3 : Str) -> Verb = mkVerbReg "TODO:infinitive" ; -- TODO mkVerbReg : (inf, sg3 : Str) -> Verb = \inf,sg3 -> - let harmony : Harm = getHarm sg3 ; - sg1 : Str = sg3 + useHarm harmony (verbEndings!) ; + let h : Harm = getHarm sg3 ; + sg1 : Str = sg3 + verbEndings ! ! h ; sg2 : Str = sg3 + "sz" ; - pl1 : Str = sg3 + useHarm harmony (verbEndings!) ; - pl2 : Str = sg3 + useHarm harmony (verbEndings!) ; - pl3 : Str = sg3 + useHarm harmony (verbEndings!) ; + pl1 : Str = sg3 + (verbEndings!) ! h ; + pl2 : Str = sg3 + (verbEndings!) ! h; + pl3 : Str = sg3 + (verbEndings!) ! h; in mkVerbFull sg1 sg2 sg3 pl1 pl2 pl3 inf ; mkVerbFull : (x1,_,_,_,_,_,x7 : Str) -> Verb = diff --git a/src/hungarian/StructuralHun.gf b/src/hungarian/StructuralHun.gf index 3e1508551..945a18ba1 100644 --- a/src/hungarian/StructuralHun.gf +++ b/src/hungarian/StructuralHun.gf @@ -128,6 +128,7 @@ lin under_Prep = mkPrep "alatt" ; nonExist ; -- Translative agr = ; isPron = True ; + poss = "em" ; } ; youPol_Pron, youSg_Pron = emptyNP ** { @@ -140,6 +141,7 @@ lin under_Prep = mkPrep "alatt" ; nonExist ; -- Translative agr = ; isPron = True ; + poss = "d" ; } ; he_Pron, she_Pron = emptyNP ** {