Merge pull request #346 from inariksit/hungarian

Hungarian: vowel harmony + handle multiwords
This commit is contained in:
Inari Listenmaa
2020-06-28 14:14:43 +02:00
committed by GitHub
4 changed files with 72 additions and 26 deletions

View File

@@ -407,6 +407,8 @@ oper
front_rounded : pattern Str = #("ö" | "ő" | "ü" | "ű") ;
i : pattern Str = #("i"|"í") ;
-- front and back rounded
-- rounded : pattern Str = #("ö" | "ő" | "ü" | "ű" | "o" | "ó" | "u" | "ú")
@@ -475,12 +477,11 @@ oper
-- Function to get a harmony from any string
getHarm : Str -> Harm = \s ->
let lastWord : Str = case s of {
x + " " + y => y ;
_ => s } ;
let lastWord : Str = (splitMultiword s).p2 ; -- only include last word
in case lastWord of {
_ + #back + _ => H_a ;
_ + #front_rounded + (#c|"") + (#c|"") => H_o ;
_ + #back + (#c|#i)* => H_a ; -- papír, gumi, zokni: back harmony
-- NB. wrong harmony for farmer.
_ + #front_rounded + (#c)* => H_o ; -- matches nő, not rövid.
_ => H_e
} ;
@@ -490,11 +491,8 @@ oper
harmFromPlNom : Str -> Harm = \férfiak ->
let ak : Str = dp 2 férfiak ;
in case ak of {
("ak"|"ek"|"ok") => getHarm ak ;
-- For any other suffix, the last two letters aren't reliable.
-- e.g. gumi-gumik has back harmony.
_ => getHarm férfiak
"ik" => getHarm férfiak ;
_ => getHarm ak
} ;
-- Even more reliable harmony indicator: singular allative
@@ -632,4 +630,24 @@ oper
_ => endCase c
} ;
Multiword : Type = {p1,p2 : Str} ;
splitMultiword : Str -> Multiword = \mw -> case mw of {
v + " " + w + " " + x + " " + y + " " + z
=> splitDash <v ++ w ++ x ++ y+" ", z> ;
w + " " + x + " " + y + " " + z
=> splitDash <w ++ x ++ y+" ", z> ;
x + " " + y + " " + z
=> splitDash <x ++ y+" ", z> ;
y + " " + z
=> splitDash <y + " ", z> ;
_ => splitDash <"", mw>
} ;
splitDash : Multiword -> Multiword = \mw ->
case mw of {
<prefix, x + "-" + y>
=> <prefix ++ x + "-", y> ;
_ => mw
} ;
}

View File

@@ -122,34 +122,56 @@ oper
harmO = ResHun.H_o ;
mkN = overload {
mkN : Str -> N =
\s -> lin N (regNoun s) ;
mkN : Str -> N = \s ->
let mw : Multiword = splitMultiword s ;
in case mw of {
<pr,n> => multiwordN pr (regNoun n)
} ;
mkN : Str -> Str -> N =
\n,a-> lin N (regNounNomAcc n a) ;
mkN : Str -> Str -> N = \n,a ->
let mwn : Multiword = splitMultiword n ;
mwa : Multiword = splitMultiword a ;
in multiwordN mwn.p1 (regNounNomAcc mwn.p2 mwa.p2) ;
mkN : Str -> Str -> Str -> N =
\n,a,pln-> lin N (regNounNomAccPl n a pln) ;
mkN : Str -> Str -> Str -> N = \n,a,pln ->
let mwn : Multiword = splitMultiword n ;
mwa : Multiword = splitMultiword a ;
mwpln : Multiword = splitMultiword pln ;
in multiwordN mwn.p1 (regNounNomAccPl mwn.p2 mwa.p2 mwpln.p2) ;
mkN : (x1,_,_,x4 : Str) -> N =
\n,a,pln,possd -> lin N (regNoun4 n a pln possd) ;
mkN : (x1,_,_,x4 : Str) -> N = \n,a,pln,possd ->
let mwn : Multiword = splitMultiword n ;
mwa : Multiword = splitMultiword a ;
mwpln : Multiword = splitMultiword pln ;
mwpossd : Multiword = splitMultiword possd ;
in multiwordN mwn.p1 (regNoun4 mwn.p2 mwa.p2 mwpln.p2 mwpossd.p2) ;
mkN : (unoka : Str) -> (testvér : N) -> N = -- Compound noun: e.g. `mkN "unoka" (mkN "testvér")`.
\prefix,n -> n ** {s = \\x => prefix + n.s ! x} ;
mkN : (unoka : Str) -> (testvér : N) -> N = compoundN ;
---------------------------------------------------------------------
-- Not in the visible API. TODO remove, improve or document better --
---------------------------------------------------------------------
-- Worst case with 9 strings.
mkN : (x1,_,_,_,_,_,_,_,x9 : Str) -> N =
\nomsg,accsg,supsg,allsg,nompl,f,g,h,i ->
lin N (worstCaseNoun nomsg accsg supsg allsg nompl
f g h i (harmFromSgAll allsg)) ;
-- mkN : (férfi : Str) -> (harm : Harmony) -> (ak : Str) -> N ; -- Noun with unpredictable vowel harmony and plural allomorph
mkN : Str -> Harmony -> N =
\s,h -> lin N (mkNounHarm h (pluralAllomorph s) s) ;
-- Noun with unpredictable vowel harmony and plural allomorph
mkN : Str -> (plural : Str) -> Harmony -> N =
\s,pl,h -> lin N (mkNounHarm h pl s) ;
\s,pl,h -> lin N (mkNounHarm h pl s)
} ;
multiwordN : Str -> Noun -> N =
\prefix,n -> case prefix of {
_ + "-" => compoundN prefix n ;
_ => lin N (n ** {s = \\x => prefix ++ n.s ! x})
} ;
compoundN : (unoka : Str) -> (testvér : Noun) -> N = -- Compound noun: e.g. `mkN "unoka" (mkN "testvér")`.
\prefix,n -> lin N (n ** {s = \\x => prefix + n.s ! x}) ;
mkN2 = overload {
mkN2 : Str -> N2 = \s -> lin N2 (regNoun s) ;

View File

@@ -97,6 +97,12 @@ oper
objdef = Def ;
} ;
defNPPrefix : (p,n : Str) -> Number -> NounPhrase = \vala,mi,n -> emptyNP ** {
s = \\p,c => vala + mkCaseNoun mi ! n ! c ;
n = n ;
objdef = Def ;
} ;
linCN : CNoun -> Str = \cn -> cn.s ! SgNom ++ cn.compl ! Sg ! Nom ++ cn.postmod ;
linNP' : Possessor -> Case -> NounPhrase -> Str = \p,c,np -> np.s ! p ! c ++ np.postmod ;
linNP : NounPhrase -> Str = linNP' NoPoss Nom ;

View File

@@ -87,7 +87,7 @@ lin nobody_NP = mkVerb; ""
lin nothing_NP = defNP "" N.NumSg ;
lin somebody_NP = defNP "" N.NumSg ;
-}
lin something_NP = defNP "valami" Sg ;
lin something_NP = defNPPrefix "vala" "mi" Sg ; -- vowel harmony according to mi
-------
-- Prep