Merge pull request #346 from inariksit/hungarian

Hungarian: vowel harmony + handle multiwords
This commit is contained in:
Inari Listenmaa
2020-06-28 14:14:43 +02:00
committed by GitHub
4 changed files with 72 additions and 26 deletions

View File

@@ -407,6 +407,8 @@ oper
front_rounded : pattern Str = #("ö" | "ő" | "ü" | "ű") ; front_rounded : pattern Str = #("ö" | "ő" | "ü" | "ű") ;
i : pattern Str = #("i"|"í") ;
-- front and back rounded -- front and back rounded
-- rounded : pattern Str = #("ö" | "ő" | "ü" | "ű" | "o" | "ó" | "u" | "ú") -- rounded : pattern Str = #("ö" | "ő" | "ü" | "ű" | "o" | "ó" | "u" | "ú")
@@ -475,12 +477,11 @@ oper
-- Function to get a harmony from any string -- Function to get a harmony from any string
getHarm : Str -> Harm = \s -> getHarm : Str -> Harm = \s ->
let lastWord : Str = case s of { let lastWord : Str = (splitMultiword s).p2 ; -- only include last word
x + " " + y => y ;
_ => s } ;
in case lastWord of { in case lastWord of {
_ + #back + _ => H_a ; _ + #back + (#c|#i)* => H_a ; -- papír, gumi, zokni: back harmony
_ + #front_rounded + (#c|"") + (#c|"") => H_o ; -- NB. wrong harmony for farmer.
_ + #front_rounded + (#c)* => H_o ; -- matches nő, not rövid.
_ => H_e _ => H_e
} ; } ;
@@ -490,11 +491,8 @@ oper
harmFromPlNom : Str -> Harm = \férfiak -> harmFromPlNom : Str -> Harm = \férfiak ->
let ak : Str = dp 2 férfiak ; let ak : Str = dp 2 férfiak ;
in case ak of { in case ak of {
("ak"|"ek"|"ok") => getHarm ak ; "ik" => getHarm férfiak ;
_ => getHarm ak
-- For any other suffix, the last two letters aren't reliable.
-- e.g. gumi-gumik has back harmony.
_ => getHarm férfiak
} ; } ;
-- Even more reliable harmony indicator: singular allative -- Even more reliable harmony indicator: singular allative
@@ -632,4 +630,24 @@ oper
_ => endCase c _ => endCase c
} ; } ;
Multiword : Type = {p1,p2 : Str} ;
splitMultiword : Str -> Multiword = \mw -> case mw of {
v + " " + w + " " + x + " " + y + " " + z
=> splitDash <v ++ w ++ x ++ y+" ", z> ;
w + " " + x + " " + y + " " + z
=> splitDash <w ++ x ++ y+" ", z> ;
x + " " + y + " " + z
=> splitDash <x ++ y+" ", z> ;
y + " " + z
=> splitDash <y + " ", z> ;
_ => splitDash <"", mw>
} ;
splitDash : Multiword -> Multiword = \mw ->
case mw of {
<prefix, x + "-" + y>
=> <prefix ++ x + "-", y> ;
_ => mw
} ;
} }

View File

@@ -122,34 +122,56 @@ oper
harmO = ResHun.H_o ; harmO = ResHun.H_o ;
mkN = overload { mkN = overload {
mkN : Str -> N = mkN : Str -> N = \s ->
\s -> lin N (regNoun s) ; let mw : Multiword = splitMultiword s ;
in case mw of {
<pr,n> => multiwordN pr (regNoun n)
} ;
mkN : Str -> Str -> N = mkN : Str -> Str -> N = \n,a ->
\n,a-> lin N (regNounNomAcc n a) ; let mwn : Multiword = splitMultiword n ;
mwa : Multiword = splitMultiword a ;
in multiwordN mwn.p1 (regNounNomAcc mwn.p2 mwa.p2) ;
mkN : Str -> Str -> Str -> N = mkN : Str -> Str -> Str -> N = \n,a,pln ->
\n,a,pln-> lin N (regNounNomAccPl n a pln) ; let mwn : Multiword = splitMultiword n ;
mwa : Multiword = splitMultiword a ;
mwpln : Multiword = splitMultiword pln ;
in multiwordN mwn.p1 (regNounNomAccPl mwn.p2 mwa.p2 mwpln.p2) ;
mkN : (x1,_,_,x4 : Str) -> N = mkN : (x1,_,_,x4 : Str) -> N = \n,a,pln,possd ->
\n,a,pln,possd -> lin N (regNoun4 n a pln possd) ; let mwn : Multiword = splitMultiword n ;
mwa : Multiword = splitMultiword a ;
mwpln : Multiword = splitMultiword pln ;
mwpossd : Multiword = splitMultiword possd ;
in multiwordN mwn.p1 (regNoun4 mwn.p2 mwa.p2 mwpln.p2 mwpossd.p2) ;
mkN : (unoka : Str) -> (testvér : N) -> N = -- Compound noun: e.g. `mkN "unoka" (mkN "testvér")`. mkN : (unoka : Str) -> (testvér : N) -> N = compoundN ;
\prefix,n -> n ** {s = \\x => prefix + n.s ! x} ;
---------------------------------------------------------------------
-- Not in the visible API. TODO remove, improve or document better --
---------------------------------------------------------------------
-- Worst case with 9 strings.
mkN : (x1,_,_,_,_,_,_,_,x9 : Str) -> N = mkN : (x1,_,_,_,_,_,_,_,x9 : Str) -> N =
\nomsg,accsg,supsg,allsg,nompl,f,g,h,i -> \nomsg,accsg,supsg,allsg,nompl,f,g,h,i ->
lin N (worstCaseNoun nomsg accsg supsg allsg nompl lin N (worstCaseNoun nomsg accsg supsg allsg nompl
f g h i (harmFromSgAll allsg)) ; f g h i (harmFromSgAll allsg)) ;
-- mkN : (férfi : Str) -> (harm : Harmony) -> (ak : Str) -> N ; -- Noun with unpredictable vowel harmony and plural allomorph -- Noun with unpredictable vowel harmony and plural allomorph
mkN : Str -> Harmony -> N =
\s,h -> lin N (mkNounHarm h (pluralAllomorph s) s) ;
mkN : Str -> (plural : Str) -> Harmony -> N = mkN : Str -> (plural : Str) -> Harmony -> N =
\s,pl,h -> lin N (mkNounHarm h pl s) ; \s,pl,h -> lin N (mkNounHarm h pl s)
} ; } ;
multiwordN : Str -> Noun -> N =
\prefix,n -> case prefix of {
_ + "-" => compoundN prefix n ;
_ => lin N (n ** {s = \\x => prefix ++ n.s ! x})
} ;
compoundN : (unoka : Str) -> (testvér : Noun) -> N = -- Compound noun: e.g. `mkN "unoka" (mkN "testvér")`.
\prefix,n -> lin N (n ** {s = \\x => prefix + n.s ! x}) ;
mkN2 = overload { mkN2 = overload {
mkN2 : Str -> N2 = \s -> lin N2 (regNoun s) ; mkN2 : Str -> N2 = \s -> lin N2 (regNoun s) ;

View File

@@ -97,6 +97,12 @@ oper
objdef = Def ; objdef = Def ;
} ; } ;
defNPPrefix : (p,n : Str) -> Number -> NounPhrase = \vala,mi,n -> emptyNP ** {
s = \\p,c => vala + mkCaseNoun mi ! n ! c ;
n = n ;
objdef = Def ;
} ;
linCN : CNoun -> Str = \cn -> cn.s ! SgNom ++ cn.compl ! Sg ! Nom ++ cn.postmod ; linCN : CNoun -> Str = \cn -> cn.s ! SgNom ++ cn.compl ! Sg ! Nom ++ cn.postmod ;
linNP' : Possessor -> Case -> NounPhrase -> Str = \p,c,np -> np.s ! p ! c ++ np.postmod ; linNP' : Possessor -> Case -> NounPhrase -> Str = \p,c,np -> np.s ! p ! c ++ np.postmod ;
linNP : NounPhrase -> Str = linNP' NoPoss Nom ; linNP : NounPhrase -> Str = linNP' NoPoss Nom ;

View File

@@ -87,7 +87,7 @@ lin nobody_NP = mkVerb; ""
lin nothing_NP = defNP "" N.NumSg ; lin nothing_NP = defNP "" N.NumSg ;
lin somebody_NP = defNP "" N.NumSg ; lin somebody_NP = defNP "" N.NumSg ;
-} -}
lin something_NP = defNP "valami" Sg ; lin something_NP = defNPPrefix "vala" "mi" Sg ; -- vowel harmony according to mi
------- -------
-- Prep -- Prep