From 2aaccfface92bf84693b858d617b0fd009fb6cb2 Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Sun, 28 Jun 2020 09:47:33 +0200 Subject: [PATCH 1/5] (Hun) Fix harmony of something_NP --- src/hungarian/ResHun.gf | 6 ++++++ src/hungarian/StructuralHun.gf | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/hungarian/ResHun.gf b/src/hungarian/ResHun.gf index cda65dda..21ecd7ba 100644 --- a/src/hungarian/ResHun.gf +++ b/src/hungarian/ResHun.gf @@ -97,6 +97,12 @@ oper objdef = Def ; } ; + defNPPrefix : (p,n : Str) -> Number -> NounPhrase = \vala,mi,n -> emptyNP ** { + s = \\p,c => vala + mkCaseNoun mi ! n ! c ; + n = n ; + objdef = Def ; + } ; + linCN : CNoun -> Str = \cn -> cn.s ! SgNom ++ cn.compl ! Sg ! Nom ++ cn.postmod ; linNP' : Possessor -> Case -> NounPhrase -> Str = \p,c,np -> np.s ! p ! c ++ np.postmod ; linNP : NounPhrase -> Str = linNP' NoPoss Nom ; diff --git a/src/hungarian/StructuralHun.gf b/src/hungarian/StructuralHun.gf index 3e6a4437..08d8babd 100644 --- a/src/hungarian/StructuralHun.gf +++ b/src/hungarian/StructuralHun.gf @@ -87,7 +87,7 @@ lin nobody_NP = mkVerb; "" lin nothing_NP = defNP "" N.NumSg ; lin somebody_NP = defNP "" N.NumSg ; -} -lin something_NP = defNP "valami" Sg ; +lin something_NP = defNPPrefix "vala" "mi" Sg ; -- vowel harmony according to mi ------- -- Prep From 267e457cc26e07769d0b22c75daa6f38e3003de4 Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Sun, 28 Jun 2020 10:35:44 +0200 Subject: [PATCH 2/5] (Hun) Switch: only ik is unreliable, treat other pl suffixes as reliable --- src/hungarian/NounMorphoHun.gf | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/hungarian/NounMorphoHun.gf b/src/hungarian/NounMorphoHun.gf index df8e8a84..cb68d8db 100644 --- a/src/hungarian/NounMorphoHun.gf +++ b/src/hungarian/NounMorphoHun.gf @@ -490,11 +490,8 @@ oper harmFromPlNom : Str -> Harm = \férfiak -> let ak : Str = dp 2 férfiak ; in case ak of { - ("ak"|"ek"|"ok") => getHarm ak ; - - -- For any other suffix, the last two letters aren't reliable. - -- e.g. gumi-gumik has back harmony. - _ => getHarm férfiak + "ik" => getHarm férfiak ; + _ => getHarm ak } ; -- Even more reliable harmony indicator: singular allative From 2cf9c2ff1e813c253f56eefd1edd1be3137902c7 Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Sun, 28 Jun 2020 12:35:05 +0200 Subject: [PATCH 3/5] (Hun) Handle multiword splitting already in Paradigms. --- src/hungarian/NounMorphoHun.gf | 15 ++++++++++ src/hungarian/ParadigmsHun.gf | 52 ++++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/src/hungarian/NounMorphoHun.gf b/src/hungarian/NounMorphoHun.gf index cb68d8db..fb11685c 100644 --- a/src/hungarian/NounMorphoHun.gf +++ b/src/hungarian/NounMorphoHun.gf @@ -629,4 +629,19 @@ oper _ => endCase c } ; + Multiword : Type = {p1,p2 : Str} ; + splitMultiword : Str -> Multiword = \multi_word -> case multi_word of { + x + "-" + y + => ; + v + " " + w + " " + x + " " + y + " " + z + => ; + w + " " + x + " " + y + " " + z + => ; + x + " " + y + " " + z + => ; + y + " " + z + => ; + _ => <"", multi_word> + } ; + } diff --git a/src/hungarian/ParadigmsHun.gf b/src/hungarian/ParadigmsHun.gf index 1887bea3..69f285e7 100644 --- a/src/hungarian/ParadigmsHun.gf +++ b/src/hungarian/ParadigmsHun.gf @@ -122,34 +122,56 @@ oper harmO = ResHun.H_o ; mkN = overload { - mkN : Str -> N = - \s -> lin N (regNoun s) ; + mkN : Str -> N = \s -> + let mw : Multiword = splitMultiword s ; + in case mw of { + => multiwordN pr (regNoun n) + } ; - mkN : Str -> Str -> N = - \n,a-> lin N (regNounNomAcc n a) ; + mkN : Str -> Str -> N = \n,a -> + let mwn : Multiword = splitMultiword n ; + mwa : Multiword = splitMultiword a ; + in multiwordN mwn.p1 (regNounNomAcc mwn.p2 mwa.p2) ; - mkN : Str -> Str -> Str -> N = - \n,a,pln-> lin N (regNounNomAccPl n a pln) ; + mkN : Str -> Str -> Str -> N = \n,a,pln -> + let mwn : Multiword = splitMultiword n ; + mwa : Multiword = splitMultiword a ; + mwpln : Multiword = splitMultiword pln ; + in multiwordN mwn.p1 (regNounNomAccPl mwn.p2 mwa.p2 mwpln.p2) ; - mkN : (x1,_,_,x4 : Str) -> N = - \n,a,pln,possd -> lin N (regNoun4 n a pln possd) ; + mkN : (x1,_,_,x4 : Str) -> N = \n,a,pln,possd -> + let mwn : Multiword = splitMultiword n ; + mwa : Multiword = splitMultiword a ; + mwpln : Multiword = splitMultiword pln ; + mwpossd : Multiword = splitMultiword possd ; + in multiwordN mwn.p1 (regNoun4 mwn.p2 mwa.p2 mwpln.p2 mwpossd.p2) ; - mkN : (unoka : Str) -> (testvér : N) -> N = -- Compound noun: e.g. `mkN "unoka" (mkN "testvér")`. - \prefix,n -> n ** {s = \\x => prefix + n.s ! x} ; + mkN : (unoka : Str) -> (testvér : N) -> N = compoundN ; + --------------------------------------------------------------------- + -- Not in the visible API. TODO remove, improve or document better -- + --------------------------------------------------------------------- + + -- Worst case with 9 strings. mkN : (x1,_,_,_,_,_,_,_,x9 : Str) -> N = \nomsg,accsg,supsg,allsg,nompl,f,g,h,i -> lin N (worstCaseNoun nomsg accsg supsg allsg nompl f g h i (harmFromSgAll allsg)) ; - -- mkN : (férfi : Str) -> (harm : Harmony) -> (ak : Str) -> N ; -- Noun with unpredictable vowel harmony and plural allomorph - mkN : Str -> Harmony -> N = - \s,h -> lin N (mkNounHarm h (pluralAllomorph s) s) ; - + -- Noun with unpredictable vowel harmony and plural allomorph mkN : Str -> (plural : Str) -> Harmony -> N = - \s,pl,h -> lin N (mkNounHarm h pl s) ; + \s,pl,h -> lin N (mkNounHarm h pl s) + } ; + multiwordN : Str -> Noun -> N = + \prefix,n -> case prefix of { + _ + "-" => compoundN prefix n ; + _ => lin N (n ** {s = \\x => prefix ++ n.s ! x}) + } ; + + compoundN : (unoka : Str) -> (testvér : Noun) -> N = -- Compound noun: e.g. `mkN "unoka" (mkN "testvér")`. + \prefix,n -> lin N (n ** {s = \\x => prefix + n.s ! x}) ; mkN2 = overload { mkN2 : Str -> N2 = \s -> lin N2 (regNoun s) ; From 9922d750dbe49bfb9b83a8978ffe9449d000ee31 Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Sun, 28 Jun 2020 12:36:11 +0200 Subject: [PATCH 4/5] (Hun) Handle back harmony words that end in i Also use multiword split just in case some grammar uses these opers directly. --- src/hungarian/NounMorphoHun.gf | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/hungarian/NounMorphoHun.gf b/src/hungarian/NounMorphoHun.gf index fb11685c..e7a4f3a8 100644 --- a/src/hungarian/NounMorphoHun.gf +++ b/src/hungarian/NounMorphoHun.gf @@ -407,6 +407,8 @@ oper front_rounded : pattern Str = #("ö" | "ő" | "ü" | "ű") ; + i : pattern Str = #("i"|"í") ; + -- front and back rounded -- rounded : pattern Str = #("ö" | "ő" | "ü" | "ű" | "o" | "ó" | "u" | "ú") @@ -475,12 +477,11 @@ oper -- Function to get a harmony from any string getHarm : Str -> Harm = \s -> - let lastWord : Str = case s of { - x + " " + y => y ; - _ => s } ; + let lastWord : Str = (splitMultiword s).p2 ; -- only include last word in case lastWord of { - _ + #back + _ => H_a ; - _ + #front_rounded + (#c|"") + (#c|"") => H_o ; + _ + #back + (#c|#i)* => H_a ; -- papír, gumi, zokni: back harmony + -- NB. wrong harmony for farmer. + _ + #front_rounded + (#c)* => H_o ; -- matches nő, not rövid. _ => H_e } ; From 7f2a5bf18fbc17e04a385cffdf074ede72470d04 Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Sun, 28 Jun 2020 14:10:37 +0200 Subject: [PATCH 5/5] (Hun) Handle dashes separately from multiwords --- src/hungarian/NounMorphoHun.gf | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/hungarian/NounMorphoHun.gf b/src/hungarian/NounMorphoHun.gf index e7a4f3a8..bb5965c2 100644 --- a/src/hungarian/NounMorphoHun.gf +++ b/src/hungarian/NounMorphoHun.gf @@ -631,18 +631,23 @@ oper } ; Multiword : Type = {p1,p2 : Str} ; - splitMultiword : Str -> Multiword = \multi_word -> case multi_word of { - x + "-" + y - => ; + splitMultiword : Str -> Multiword = \mw -> case mw of { v + " " + w + " " + x + " " + y + " " + z - => ; + => splitDash ; w + " " + x + " " + y + " " + z - => ; + => splitDash ; x + " " + y + " " + z - => ; + => splitDash ; y + " " + z - => ; - _ => <"", multi_word> + => splitDash ; + _ => splitDash <"", mw> } ; + splitDash : Multiword -> Multiword = \mw -> + case mw of { + + => ; + _ => mw + } ; + }