forked from GitHub/gf-rgl
Progress on writing multi-argument paradigm
This commit is contained in:
@@ -7,8 +7,8 @@ oper
|
||||
-- http://www.cse.chalmers.se/~aarne/articles/smart-preprint.pdf
|
||||
|
||||
-- Words like "alma, kefe, apa, anya, fa":
|
||||
dAlma : Str -> Noun = \alma ->
|
||||
let almá : Str = lengthen alma ;
|
||||
dAlma : (nom : Str) -> (acc : Str) -> Noun = \alma,almát ->
|
||||
let almá : Str = init almát ;
|
||||
|
||||
-- Apply mkNoun to the lengthened stem "almá" or "kefé"
|
||||
nAlmá : Noun = mkNoun almá ;
|
||||
@@ -24,17 +24,8 @@ oper
|
||||
|
||||
-- Handles words like "madár, nyár, név, bogár" with shortened stem vowel in plural
|
||||
-- No special <Sg,Sup> case here
|
||||
-- dMadár: "víz" has wovel shortening but "vizek" not "vizik", implement differently?
|
||||
dMadár : Str -> Noun = \madár ->
|
||||
let r = last madár ;
|
||||
madá = init madár ;
|
||||
mada = shorten madá ; -- shortens vowels
|
||||
a = last mada ;
|
||||
a = case a of {
|
||||
"e"|"i" => "e" ;
|
||||
a => a
|
||||
} ;
|
||||
madara = mada + r + a ;
|
||||
dMadár : (nom : Str) -> (acc : Str) -> Noun = \madár,madarat ->
|
||||
let madara = init madarat ;
|
||||
nMadara = mkNounHarm (getHarm madara) "k" madara ;
|
||||
nMadár = mkNoun madár ;
|
||||
in {s = \\n,c => case <n,c> of {
|
||||
@@ -49,24 +40,19 @@ oper
|
||||
} ;
|
||||
|
||||
--Handles words like "ló, lé, kő" which are "lovak, levek, kövek" in plural.
|
||||
-- <Sg,Sup> "lovon" instead of "lón" fixed but that gives the following problems:
|
||||
dLó : Str -> Noun = \ló ->
|
||||
let lo = shorten ló ;
|
||||
lov = lo + "v" ;
|
||||
ak : Str = case ló of {
|
||||
_ + ("ö" | "ő") => "ek" ;
|
||||
_ + ("o" | "ó") => "ak" ;
|
||||
lé => shorten (last lé) + "k"} ;
|
||||
harmonyPlural : Harm = case ló of {
|
||||
_ + ("ö" | "ő") => H_e ; -- All plural allomorphs have E harmony, singular ones have O.
|
||||
_ + ("o" | "ó") => H_a ;
|
||||
lé => getHarm (lé)} ;
|
||||
nLov = mkNounHarm harmonyPlural ak lov ;
|
||||
--Also handles "tó, hó" which are "tavak, havak" in plural!
|
||||
-- <Sg,Sup> "lovon" instead of "lón" fixed
|
||||
dLó : (nom : Str) -> (acc : Str) -> Noun = \ló, lovat ->
|
||||
let lova = init lovat ;
|
||||
lov = init lova ;
|
||||
nLov = mkNoun lov ;
|
||||
nLova = mkNoun lova ;
|
||||
nLó = mkNoun ló ;
|
||||
in {s = \\n,c => case <n,c> of {
|
||||
|
||||
-- All plural forms and Sg Acc, Sg Sup use the "lov" stem
|
||||
<Pl,_>| <Sg,Acc> | <Sg,Sup> => nLov.s ! n ! c ;
|
||||
-- All plural forms and Sg Acc, Sg Sup use the "lova" stem
|
||||
<Pl,_>| <Sg,Acc> => nLova.s ! n ! c ;
|
||||
<Sg,Sup> => nLov.s ! n ! c ;
|
||||
|
||||
-- The rest of the forms are formed with the regular constructor,
|
||||
-- using "ló" as the stem.
|
||||
@@ -75,51 +61,21 @@ oper
|
||||
} ;
|
||||
} ;
|
||||
|
||||
--Handles words like "tó, hó"" which are "tavak, havak" in plural.
|
||||
--(Since I only have these examples for now I do a simplified case with ó, a)
|
||||
--<Sg,Sup> "tavon" instead of "tón" case fixed, works automatically with the Sup rules
|
||||
dTó : Str -> Noun = \tó ->
|
||||
let t = init tó ;
|
||||
tav = t + "av" ;
|
||||
nTav = mkNounHarm H_a "ak" tav ;
|
||||
nTó = mkNoun tó ;
|
||||
in {s = \\n,c => case <n,c> of {
|
||||
|
||||
-- All plural forms and Sg Acc use the "tav" stem
|
||||
<Pl,_>|<Sg,Acc>|<Sg,Sup> => nTav.s ! n ! c ;
|
||||
|
||||
-- The rest of the forms are formed with the regular constructor,
|
||||
-- using "tó" as the stem.
|
||||
_ => nTó.s ! n ! c
|
||||
|
||||
} ;
|
||||
} ;
|
||||
|
||||
--Handles words like "gyomor, majom, retek" which are "gyomrot, majmot, retket" in accusative (wovel dropping base)
|
||||
--More examples: "ajak, bokor, cukor, csokor, eper, fészek, fodor, gödör, haszon, iker, izom, kölyök, köröm, méreg, piszok, sarok, selyem, szeder, szobor, takony, terem, titok, torok, torony, tükör, vödör" ->
|
||||
-- "ajkat, bokrot, cukrot, csokrot, epret, fészket, fodrot, gödröt, hasznot, ikret, izmot, kölyköt, körmet, mérget, piszkot, sarkot, selymet, szedret, szobrot, taknyot, termet, titkot, torkot, tornyot, tükröt, vödröt"
|
||||
--ALso handles words like "sátor, álom, alkalom, farok, halom, vászon"
|
||||
-- "sátrat, álmat, alkalmat, farkat, halmat, vásznat"
|
||||
--<Sg,Sup> case handled
|
||||
dMajom : Str -> Noun = \majom ->
|
||||
-- Str*Str is syntactic sugar for {p1 : Str ; p2 : Str} ;
|
||||
-- confusing syntax: you can't write let <tako,ny> : Str*Str = …
|
||||
-- it has to be called something else, and then you
|
||||
-- can get "tako" and "ny" with p1, p2.
|
||||
let tako_ny : Str*Str = case majom of {
|
||||
x + dzs@#trigraph => <x,dzs> ;
|
||||
x + zs@#digraph => <x,zs> ;
|
||||
-- ? pattern matches exactly 1 character
|
||||
x + s@? => <x,s> } ;
|
||||
tako = tako_ny.p1 ;
|
||||
ny = tako_ny.p2 ;
|
||||
|
||||
nyo = ny + last tako ;
|
||||
tak = init tako ;
|
||||
taknyo = tak + nyo ;
|
||||
nMajmo = mkNounHarm (getHarm taknyo) "k" taknyo ;
|
||||
dMajom : (nom : Str) -> (acc : Str) -> Noun = \majom, majmot ->
|
||||
let majmo = init majmot ;
|
||||
majm = init majmo ;
|
||||
nMajmo = mkNoun majmo ;
|
||||
nMajom = mkNoun majom ;
|
||||
in {s = \\n,c => case <n,c> of {
|
||||
-- All plural forms and Sg Acc and Sg Sup use the "majmo" stem
|
||||
<Pl,_> | <Sg,Acc> | <Sg, Sup> => nMajmo.s ! n ! c ;
|
||||
<Pl,_> | <Sg,Acc> => nMajmo.s ! n ! c ;
|
||||
<Sg,Sup> => nMajm.s ! n ! c ;
|
||||
|
||||
-- The rest of the forms are formed with the regular constructor,
|
||||
-- using "majom" as the stem.
|
||||
@@ -127,25 +83,6 @@ oper
|
||||
} ;
|
||||
} ;
|
||||
|
||||
--Handles words like "sátor, álom, alkalom, farok, halom, vászon"
|
||||
-- "sátrat, álmat, alkalmat, farkat, halmat, vásznat"
|
||||
-- (bátor not noun)
|
||||
dFarok : Str -> Noun = \farok ->
|
||||
let k = last farok ;
|
||||
far = init (init farok) ;
|
||||
fark = far + k ;
|
||||
nFark = mkNounHarm (getHarm fark) "ak" fark ;
|
||||
nFarok = mkNoun farok ;
|
||||
in {s = \\n,c => case <n,c> of {
|
||||
-- All plural forms and Sg Acc and Sg Sup use the "fark" stem
|
||||
<Pl,_> | <Sg,Acc> | <Sg, Sup> => nFark.s ! n ! c ;
|
||||
|
||||
-- The rest of the forms are formed with the regular constructor,
|
||||
-- using "farok" as the stem.
|
||||
_ => nFarok.s ! n ! c
|
||||
} ;
|
||||
} ;
|
||||
|
||||
-- More words not covered by current paradigms:
|
||||
-- https://cl.lingfil.uu.se/~bea/publ/megyesi-hungarian.pdf
|
||||
-- TODO: falu ~ falva-k (v-case)
|
||||
@@ -154,20 +91,12 @@ oper
|
||||
|
||||
-- regNoun is a /smart paradigm/: it takes one or a couple of forms,
|
||||
-- and decides which (non-smart) paradigm is the most likely to match.
|
||||
regNoun : Str -> Noun = \sgnom -> case sgnom of {
|
||||
_ + "a"|"e" => dAlma sgnom ;
|
||||
(? | #digraph | #trigraph) + ("á"|"é") + (? | #digraph | #trigraph) => mkNoun sgnom ;
|
||||
_ + ("á"|"é") + ? => dMadár sgnom ;
|
||||
_ + "é"|"ő"|"ű" => dLó sgnom ;
|
||||
_ + "ó" => dTó sgnom ;
|
||||
_ + "alom" => dFarok sgnom ;
|
||||
_ + "elem" => dMajom sgnom ;
|
||||
-- regNounNomAcc : (nom : Str) -> (acc : Str) -> Noun = n, a -> mkNoun n ;
|
||||
|
||||
-- TODO: more non-smart paradigms + more pattern matching
|
||||
-- TODO: smart paradigms with >1 form. Which forms are the most descriptive?
|
||||
-- regNounNomAcc n a | (last n) == shorten (last (init a)) = dAlma n a
|
||||
|
||||
-- guess : (nom : Str) -> (acc : Str) = \sgnom -> case sgnom of
|
||||
|
||||
_ => mkNoun sgnom -- Fall back to the regular paradigm
|
||||
} ;
|
||||
|
||||
--TODO: Special cases (enter these words manually to not complicate the paradigms):
|
||||
--dTó: szó special case which fulfills the plural cases but not the <Sg,Acc> or <Sg,Sup> case ("szót" not "szavat")
|
||||
|
||||
Reference in New Issue
Block a user