Progress on writing multi-argument paradigm

This commit is contained in:
Julia Jansson
2020-04-19 17:01:57 +02:00
parent dfd370830f
commit c47a6cd279
2 changed files with 387 additions and 97 deletions
+26 -97
View File
@@ -7,8 +7,8 @@ oper
-- http://www.cse.chalmers.se/~aarne/articles/smart-preprint.pdf
-- Words like "alma, kefe, apa, anya, fa":
dAlma : Str -> Noun = \alma ->
let almá : Str = lengthen alma ;
dAlma : (nom : Str) -> (acc : Str) -> Noun = \alma,almát ->
let almá : Str = init almát ;
-- Apply mkNoun to the lengthened stem "almá" or "kefé"
nAlmá : Noun = mkNoun almá ;
@@ -24,17 +24,8 @@ oper
-- Handles words like "madár, nyár, név, bogár" with shortened stem vowel in plural
-- No special <Sg,Sup> case here
-- dMadár: "víz" has wovel shortening but "vizek" not "vizik", implement differently?
dMadár : Str -> Noun = \madár ->
let r = last madár ;
madá = init madár ;
mada = shorten madá ; -- shortens vowels
a = last mada ;
a = case a of {
"e"|"i" => "e" ;
a => a
} ;
madara = mada + r + a ;
dMadár : (nom : Str) -> (acc : Str) -> Noun = \madár,madarat ->
let madara = init madarat ;
nMadara = mkNounHarm (getHarm madara) "k" madara ;
nMadár = mkNoun madár ;
in {s = \\n,c => case <n,c> of {
@@ -49,24 +40,19 @@ oper
} ;
--Handles words like "ló, lé, kő" which are "lovak, levek, kövek" in plural.
-- <Sg,Sup> "lovon" instead of "lón" fixed but that gives the following problems:
dLó : Str -> Noun = \ló ->
let lo = shorten ló ;
lov = lo + "v" ;
ak : Str = case ló of {
_ + ("ö" | "ő") => "ek" ;
_ + ("o" | "ó") => "ak" ;
lé => shorten (last lé) + "k"} ;
harmonyPlural : Harm = case ló of {
_ + ("ö" | "ő") => H_e ; -- All plural allomorphs have E harmony, singular ones have O.
_ + ("o" | "ó") => H_a ;
lé => getHarm (lé)} ;
nLov = mkNounHarm harmonyPlural ak lov ;
--Also handles "tó, hó" which are "tavak, havak" in plural!
-- <Sg,Sup> "lovon" instead of "lón" fixed
dLó : (nom : Str) -> (acc : Str) -> Noun = \ló, lovat ->
let lova = init lovat ;
lov = init lova ;
nLov = mkNoun lov ;
nLova = mkNoun lova ;
nLó = mkNoun ló ;
in {s = \\n,c => case <n,c> of {
-- All plural forms and Sg Acc, Sg Sup use the "lov" stem
<Pl,_>| <Sg,Acc> | <Sg,Sup> => nLov.s ! n ! c ;
-- All plural forms and Sg Acc, Sg Sup use the "lova" stem
<Pl,_>| <Sg,Acc> => nLova.s ! n ! c ;
<Sg,Sup> => nLov.s ! n ! c ;
-- The rest of the forms are formed with the regular constructor,
-- using "ló" as the stem.
@@ -75,51 +61,21 @@ oper
} ;
} ;
--Handles words like "tó, hó"" which are "tavak, havak" in plural.
--(Since I only have these examples for now I do a simplified case with ó, a)
--<Sg,Sup> "tavon" instead of "tón" case fixed, works automatically with the Sup rules
dTó : Str -> Noun = \tó ->
let t = init tó ;
tav = t + "av" ;
nTav = mkNounHarm H_a "ak" tav ;
nTó = mkNoun tó ;
in {s = \\n,c => case <n,c> of {
-- All plural forms and Sg Acc use the "tav" stem
<Pl,_>|<Sg,Acc>|<Sg,Sup> => nTav.s ! n ! c ;
-- The rest of the forms are formed with the regular constructor,
-- using "tó" as the stem.
_ => nTó.s ! n ! c
} ;
} ;
--Handles words like "gyomor, majom, retek" which are "gyomrot, majmot, retket" in accusative (wovel dropping base)
--More examples: "ajak, bokor, cukor, csokor, eper, fészek, fodor, gödör, haszon, iker, izom, kölyök, köröm, méreg, piszok, sarok, selyem, szeder, szobor, takony, terem, titok, torok, torony, tükör, vödör" ->
-- "ajkat, bokrot, cukrot, csokrot, epret, fészket, fodrot, gödröt, hasznot, ikret, izmot, kölyköt, körmet, mérget, piszkot, sarkot, selymet, szedret, szobrot, taknyot, termet, titkot, torkot, tornyot, tükröt, vödröt"
--ALso handles words like "sátor, álom, alkalom, farok, halom, vászon"
-- "sátrat, álmat, alkalmat, farkat, halmat, vásznat"
--<Sg,Sup> case handled
dMajom : Str -> Noun = \majom ->
-- Str*Str is syntactic sugar for {p1 : Str ; p2 : Str} ;
-- confusing syntax: you can't write let <tako,ny> : Str*Str = …
-- it has to be called something else, and then you
-- can get "tako" and "ny" with p1, p2.
let tako_ny : Str*Str = case majom of {
x + dzs@#trigraph => <x,dzs> ;
x + zs@#digraph => <x,zs> ;
-- ? pattern matches exactly 1 character
x + s@? => <x,s> } ;
tako = tako_ny.p1 ;
ny = tako_ny.p2 ;
nyo = ny + last tako ;
tak = init tako ;
taknyo = tak + nyo ;
nMajmo = mkNounHarm (getHarm taknyo) "k" taknyo ;
dMajom : (nom : Str) -> (acc : Str) -> Noun = \majom, majmot ->
let majmo = init majmot ;
majm = init majmo ;
nMajmo = mkNoun majmo ;
nMajom = mkNoun majom ;
in {s = \\n,c => case <n,c> of {
-- All plural forms and Sg Acc and Sg Sup use the "majmo" stem
<Pl,_> | <Sg,Acc> | <Sg, Sup> => nMajmo.s ! n ! c ;
<Pl,_> | <Sg,Acc> => nMajmo.s ! n ! c ;
<Sg,Sup> => nMajm.s ! n ! c ;
-- The rest of the forms are formed with the regular constructor,
-- using "majom" as the stem.
@@ -127,25 +83,6 @@ oper
} ;
} ;
--Handles words like "sátor, álom, alkalom, farok, halom, vászon"
-- "sátrat, álmat, alkalmat, farkat, halmat, vásznat"
-- (bátor not noun)
dFarok : Str -> Noun = \farok ->
let k = last farok ;
far = init (init farok) ;
fark = far + k ;
nFark = mkNounHarm (getHarm fark) "ak" fark ;
nFarok = mkNoun farok ;
in {s = \\n,c => case <n,c> of {
-- All plural forms and Sg Acc and Sg Sup use the "fark" stem
<Pl,_> | <Sg,Acc> | <Sg, Sup> => nFark.s ! n ! c ;
-- The rest of the forms are formed with the regular constructor,
-- using "farok" as the stem.
_ => nFarok.s ! n ! c
} ;
} ;
-- More words not covered by current paradigms:
-- https://cl.lingfil.uu.se/~bea/publ/megyesi-hungarian.pdf
-- TODO: falu ~ falva-k (v-case)
@@ -154,20 +91,12 @@ oper
-- regNoun is a /smart paradigm/: it takes one or a couple of forms,
-- and decides which (non-smart) paradigm is the most likely to match.
regNoun : Str -> Noun = \sgnom -> case sgnom of {
_ + "a"|"e" => dAlma sgnom ;
(? | #digraph | #trigraph) + ("á"|"é") + (? | #digraph | #trigraph) => mkNoun sgnom ;
_ + ("á"|"é") + ? => dMadár sgnom ;
_ + "é"|"ő"|"ű" => dLó sgnom ;
_ + "ó" => dTó sgnom ;
_ + "alom" => dFarok sgnom ;
_ + "elem" => dMajom sgnom ;
-- regNounNomAcc : (nom : Str) -> (acc : Str) -> Noun = n, a -> mkNoun n ;
-- TODO: more non-smart paradigms + more pattern matching
-- TODO: smart paradigms with >1 form. Which forms are the most descriptive?
-- regNounNomAcc n a | (last n) == shorten (last (init a)) = dAlma n a
-- guess : (nom : Str) -> (acc : Str) = \sgnom -> case sgnom of
_ => mkNoun sgnom -- Fall back to the regular paradigm
} ;
--TODO: Special cases (enter these words manually to not complicate the paradigms):
--dTó: szó special case which fulfills the plural cases but not the <Sg,Acc> or <Sg,Sup> case ("szót" not "szavat")