From 2adce20f7629f245db3a7913cd857668232e7f6d Mon Sep 17 00:00:00 2001 From: lauma Date: Sat, 25 Oct 2025 00:56:24 +0300 Subject: [PATCH] Work on the new noun paradigms. --- src/latvian/PortedMorphoParadigmsLav.gf | 380 +++++++++++++++++++++- src/latvian/PortedMorphoStemchangesLav.gf | 118 ++++--- src/latvian/PortedMorphoUtilsLav.gf | 53 +++ 3 files changed, 507 insertions(+), 44 deletions(-) create mode 100644 src/latvian/PortedMorphoUtilsLav.gf diff --git a/src/latvian/PortedMorphoParadigmsLav.gf b/src/latvian/PortedMorphoParadigmsLav.gf index c09593b5..d630c8e5 100644 --- a/src/latvian/PortedMorphoParadigmsLav.gf +++ b/src/latvian/PortedMorphoParadigmsLav.gf @@ -1,6 +1,382 @@ --# -path=.:abstract:common:prelude --- Here goes authomatically ported paradigms from +-- Contents of this file are automatically ported paradigms from -- https://github.com/PeterisP/morphology/blob/master/src/main/resources/Lexicon_v2.xml +-- NB: Do NOT edit this without consulting lauma@ailab.lv or normundsg@ailab.lv +-- Otherwise your changes might get accidentally revoked! + +resource PortedMorphoParadigmsLav = open PortedMorphoStemchangesLav, ResLav in { + +flags coding = utf8 ; + +oper + + noun_1a : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "us" ; + Dat => stem + "iem" ; + Loc => stem + "os" ; + Nom => stem + "i" ; + Voc => stem + "i" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "u" ; + Dat => stem + "am" ; + Loc => stem + "ā" ; + Nom => stem + "s" ; + Voc => variants { stem + "s" ; stem + "" } ; + Gen => stem + "a" + } + } ; + gend = Masc + } ; + + noun_1b : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "us" ; + Dat => stem + "iem" ; + Loc => stem + "os" ; + Nom => stem + "i" ; + Voc => stem + "i" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "u" ; + Dat => stem + "am" ; + Loc => stem + "ā" ; + Nom => stem + "š" ; + Voc => variants { stem + "š" ; stem + "" } ; + Gen => stem + "a" + } + } ; + gend = Masc + } ; + + noun_2a : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stemchangeSimple 1 stem + "us" ; + Dat => stemchangeSimple 1 stem + "iem" ; + Loc => stemchangeSimple 1 stem + "os" ; + Nom => stemchangeSimple 1 stem + "i" ; + Voc => stemchangeSimple 1 stem + "i" ; + Gen => stemchangeSimple 1 stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "im" ; + Loc => stem + "ī" ; + Nom => stem + "is" ; + Voc => stem + "i" ; + Gen => stemchangeSimple 1 stem + "a" + } + } ; + gend = Masc + } ; + + noun_2c : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stemchangeSimple 1 stem + "us" ; + Dat => stemchangeSimple 1 stem + "iem" ; + Loc => stemchangeSimple 1 stem + "os" ; + Nom => stemchangeSimple 1 stem + "i" ; + Voc => stemchangeSimple 1 stem + "i" ; + Gen => stemchangeSimple 1 stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "im" ; + Loc => stem + "ī" ; + Nom => stem + "s" ; + Voc => variants { stem + "" ; stem + "s" } ; + Gen => stem + "s" + } + } ; + gend = Masc + } ; + + noun_2d : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stemchangeSimple 1 stem + "us" ; + Dat => stemchangeSimple 1 stem + "iem" ; + Loc => stemchangeSimple 1 stem + "os" ; + Nom => stemchangeSimple 1 stem + "i" ; + Voc => stemchangeSimple 1 stem + "i" ; + Gen => stemchangeSimple 1 stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "im" ; + Loc => stem + "ī" ; + Nom => stem + "s" ; + Voc => stem + "i" ; + Gen => stemchangeSimple 1 stem + "a" + } + } ; + gend = Masc + } ; + + noun_3m : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "us" ; + Dat => stem + "iem" ; + Loc => stem + "os" ; + Nom => stem + "i" ; + Voc => stem + "i" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "u" ; + Dat => stem + "um" ; + Loc => stem + "ū" ; + Nom => stem + "us" ; + Voc => variants { stem + "us" ; stem + "u" } ; + Gen => stem + "us" + } + } ; + gend = Masc + } ; + + noun_4f : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "as" ; + Dat => stem + "ām" ; + Loc => stem + "ās" ; + Nom => stem + "as" ; + Voc => stem + "as" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "u" ; + Dat => stem + "ai" ; + Loc => stem + "ā" ; + Nom => stem + "a" ; + Voc => variants { stem + "a" ; stemchangeSimple 17 stem + "" } ; + Gen => stem + "as" + } + } ; + gend = Fem + } ; + + noun_4m : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "as" ; + Dat => stem + "ām" ; + Loc => stem + "ās" ; + Nom => stem + "as" ; + Voc => stem + "as" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "u" ; + Dat => stem + "am" ; + Loc => stem + "ā" ; + Nom => stem + "a" ; + Voc => stem + "a" ; + Gen => stem + "as" + } + } ; + gend = Masc + } ; + + noun_5fa : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "es" ; + Dat => stem + "ēm" ; + Loc => stem + "ēs" ; + Nom => stem + "es" ; + Voc => stem + "es" ; + Gen => stemchangeSimple 1 stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "ei" ; + Loc => stem + "ē" ; + Nom => stem + "e" ; + Voc => variants { stem + "e" ; stemchangeSimple 17 stem + "" } ; + Gen => stem + "es" + } + } ; + gend = Fem + } ; + + noun_5ma : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "es" ; + Dat => stem + "ēm" ; + Loc => stem + "ēs" ; + Nom => stem + "es" ; + Voc => stem + "es" ; + Gen => stemchangeSimple 1 stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "em" ; + Loc => stem + "ē" ; + Nom => stem + "e" ; + Voc => stem + "e" ; + Gen => stem + "es" + } + } ; + gend = Masc + } ; + + noun_6a : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "is" ; + Dat => stem + "īm" ; + Loc => stem + "īs" ; + Nom => stem + "is" ; + Voc => stem + "is" ; + Gen => stemchangeSimple 1 stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "ij" ; + Loc => stem + "ī" ; + Nom => stem + "s" ; + Voc => stem + "s" ; + Gen => stem + "s" + } + } ; + gend = Fem + } ; + + noun_3f : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "us" ; + Dat => stem + "ūm" ; + Loc => stem + "ūs" ; + Nom => stem + "us" ; + Voc => stem + "us" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "u" ; + Dat => stem + "ui" ; + Loc => stem + "ū" ; + Nom => stem + "us" ; + Voc => variants { stem + "us" ; stem + "u" } ; + Gen => stem + "us" + } + } ; + gend = Fem + } ; + + noun_6b : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "is" ; + Dat => stem + "īm" ; + Loc => stem + "īs" ; + Nom => stem + "is" ; + Voc => stem + "is" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "ij" ; + Loc => stem + "ī" ; + Nom => stem + "s" ; + Voc => stem + "s" ; + Gen => stem + "s" + } + } ; + gend = Fem + } ; + + noun_5fb : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "es" ; + Dat => stem + "ēm" ; + Loc => stem + "ēs" ; + Nom => stem + "es" ; + Voc => stem + "es" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "ei" ; + Loc => stem + "ē" ; + Nom => stem + "e" ; + Voc => variants { stem + "e" ; stemchangeSimple 17 stem + "" } ; + Gen => stem + "es" + } + } ; + gend = Fem + } ; + + noun_5mb : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "es" ; + Dat => stem + "ēm" ; + Loc => stem + "ēs" ; + Nom => stem + "es" ; + Voc => stem + "es" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "em" ; + Loc => stem + "ē" ; + Nom => stem + "e" ; + Voc => stem + "e" ; + Gen => stem + "es" + } + } ; + gend = Masc + } ; + + noun_2b : Str -> Noun = \stem -> + { + s = table { + Pl => table { + Acc => stem + "us" ; + Dat => stem + "iem" ; + Loc => stem + "os" ; + Nom => stem + "i" ; + Voc => stem + "i" ; + Gen => stem + "u" + } ; + Sg => table { + Acc => stem + "i" ; + Dat => stem + "im" ; + Loc => stem + "ī" ; + Nom => stem + "is" ; + Voc => stem + "i" ; + Gen => variants { stem + "a" ; stemchangeSimple 1 stem + "a" } + } + } ; + gend = Masc + } ; +} -resource PortedMorphoParadigmsLav = open Prelude, Predef in {} \ No newline at end of file diff --git a/src/latvian/PortedMorphoStemchangesLav.gf b/src/latvian/PortedMorphoStemchangesLav.gf index f0c4e9ae..bce1c1dc 100644 --- a/src/latvian/PortedMorphoStemchangesLav.gf +++ b/src/latvian/PortedMorphoStemchangesLav.gf @@ -3,47 +3,80 @@ -- Here goes manually ported stemchanges from -- https://github.com/PeterisP/morphology/blob/master/src/main/java/lv/semti/morphology/analyzer/Mijas.java -resource PortedMorphoStemchangesLav = open Prelude, Predef in { +resource PortedMorphoStemchangesLav = open Prelude, Predef, ResLav in { flags coding = utf8 ; oper - stemchange : Int -> Str -> Str = \parId,stem -> - case parId of { - 0 => stem; - 1 => case stem of { -- nouns - s + ("kst") => s + "kš"; - s + ("nst") => s + "nš"; - s + ("ll") => s + "ļļ"; - s + ("sl") => s + "šļ"; - s + ("zl") => s + "žļ"; - s + ("ln") => s + "ļņ"; - s + ("nn") => s + "ņņ"; - s + ("sn") => s + "šņ"; - s + ("zn") => s + "žņ"; - s + ("īt") => s + "īš"; - s + ("d") => s + "ž"; - s + ("t") => s + "š"; - s + ("n") => s + "ņ"; - s + ("s") => s + "š"; - s + ("z") => s + "ž"; - s + ("b"|"f"|"m"|"p"|"v") => stem + "j"; - - _ => stem - }; + stemchangeSimple : Int -> Str -> Str = \parId,stem -> + case parId of { + 0 => stem ; - 17 => case of { -- shortened vocative for fem nouns - => stem; - --<_, 0|1> => variants {}; - <_, 0|1> => nonExist; -- Exception - <_, _> => stem - }; + -- For nouns we need changes 0, 1, 17 + 1 => case stem of { -- nouns + s + ("kst") => s + "kš" ; + s + ("nst") => s + "nš" ; + s + ("ll") => s + "ļļ" ; + s + ("sl") => s + "šļ" ; + s + ("zl") => s + "žļ" ; + s + ("ln") => s + "ļņ" ; + s + ("nn") => s + "ņņ" ; + s + ("sn") => s + "šņ" ; + s + ("zn") => s + "žņ" ; + s + ("īt") => s + "īš" ; + s + ("d") => s + "ž" ; + s + ("t") => s + "š" ; + s + ("n") => s + "ņ" ; + s + ("s") => s + "š" ; + s + ("z") => s + "ž" ; + s + ("b"|"f"|"m"|"p"|"v") => stem + "j" ; + + _ => stem + } ; + + 17 => case of { -- shortened vocative for fem nouns + => stem ; + --<_, 0|1> => variants {} ; + <_, 0|1> => nonExist ; -- Exception + <_, _> => stem + } ; + + -- For full adjectives we need changes 0, 3, 34, 13 - _ => error ("Unsupported stemchange") - }; + _ => error ("Unsupported stemchange") + }; + stemchangeForAdjAdv : Int -> Str -> Definiteness -> Degree -> Str = \parId,stem,defi,deg -> + case parId of { + 3 => case stem of { + s + "āk" => case deg of { + Posit => stem ; + _ => nonExist -- TODO what should go there for nonexisting forms? + }; + _ => case deg of { + Posit => stem ; + Compar => stem + "āk" ; + Superl => case defi of { + Def => "vis" + stem + "āk" ; + Indef => nonExist + } + } + } ; + 34 => case deg of { -- pēdēj-ais -> pēdē-jam, zaļ-š -> zaļa-jam + Posit => case stem of { + s + "ēj" => s + "ē" ; + _ => stem + "a" + } ; + Compar => stem + "āka" ; + Superl => case defi of { + Def => "vis" + stem + "āka" ; + Indef => nonExist + } + } ; + _ => case deg of { _ => stemchangeSimple parId stem } + } ; -- Inari's trick for counting syllables https://inariksit.github.io/gf/2018/08/28/gf-gotchas.html#cute-way-to-count-syllables -- pattern macro for vowels @@ -53,19 +86,20 @@ oper SylCnt : Type = Bool -> Str -> Ints 10 ; countSyllables : Str -> Ints 10 = go count False - where { - go : SylCnt -> SylCnt = \f,wasVowel,word -> + where { -- synonym of let + go : SylCnt -> SylCnt = \f,wasVowel,word -> -- These three arguments come from the fact that SylCnt is the abovedefined 3rd order function. case of { - <#v + s, False> => Predef.plus (f True s) 1 ; - <#v + s, True> => f True s ; - => f False s ; - _ => 0 } ; + <#v + s, False> => Predef.plus (f True s) 1 ; + <#v + s, True> => f True s ; + => f False s ; + _ => 0 + } ; - -- end of recursion - scBase : SylCnt = \_,_ -> 0 ; + -- end of recursion + scBase : SylCnt = \_,_ -> 0 ; - -- the function given to countSyllables - count : SylCnt = go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go scBase))))))))))))))))))))))))))))))))))))))) + -- the function given to countSyllables + count : SylCnt = go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go (go scBase))))))))))))))))))))))))))))))))))))))) } ; } diff --git a/src/latvian/PortedMorphoUtilsLav.gf b/src/latvian/PortedMorphoUtilsLav.gf new file mode 100644 index 00000000..c3368782 --- /dev/null +++ b/src/latvian/PortedMorphoUtilsLav.gf @@ -0,0 +1,53 @@ +--# -path=.:abstract:common:prelude + +-- Here goes manually made utils used for automaticaly exported Tēzaurs lexicon. + +resource PortedMorphoUtilsLav = open Prelude, Predef, ResLav in { + +flags coding = utf8 ; + +oper + + changeNounForm : Number -> Case -> Str -> Noun -> Noun = + \N,C,form,noun -> noun ** { + s = \\n,c => case of { + | + | + | + | + | + | + | + => form ; + _ => noun.s ! n ! c } + } ; + + addNounForm : Number -> Case -> Str -> Noun -> Noun = + \N,C,form,noun -> noun ** { + s = \\n,c => case of { + | + | + | + | + | + | + | + => variants {form ; noun.s ! n ! c} ; + _ => noun.s ! n ! c } + } ; + + {- A note about GF restrictions as of 2025-08-27: + Following thing does not work, compiler asumes that N an C are free variables. + changeForm_ideal : Number -> Case -> Str -> Noun -> Noun ; + changeForm_ideal N C str noun = noun ** {NF N C => str} ; + Meanwhile this should work + changeForm_fixed : Str -> Noun -> Noun ; + changeForm_fixed str noun = noun ** {NF Sg Nom => str} ; + Related: https://github.com/GrammaticalFramework/gf-core/issues/198 + Also Inari suggested to avoid using a function and write this thing inline: + Noun : Type = {s : NForm => Str} ; + myRegularNoun = {- … some noun …- } ; + mySpecialNoun = {s = myRegularNoun.s ** {NF Sg Acc => "special form"}} ; + -} + +}