(Hun) WIP: first attempt at stem-based nominal morphology

2026-05-27 17:08:54 -06:00 · 2020-04-22 19:55:56 +02:00
parent 3705c9229d
commit 15040355e6
5 changed files with 88 additions and 65 deletions
--- a/src/hungarian/CatHun.gf
+++ b/src/hungarian/CatHun.gf
@@ -58,7 +58,7 @@ concrete CatHun of Cat = CommonX ** open ResHun, Prelude in {
 -- ``` Predet (QuantSg | QuantPl Num) Ord
 -- as defined in NounHun.
-    CN = ResHun.Noun ;
+    CN = ResHun.CNoun ;
    NP = ResHun.NounPhrase ;
    Pron = ResHun.Pronoun ; --Pronouns need enough info to turn it into NP or Quant.
    Det = ResHun.Determiner ;
@@ -118,4 +118,7 @@ concrete CatHun of Cat = CommonX ** open ResHun, Prelude in {
    N3 = ResHun.Noun ;
    PN = ResHun.NounPhrase ;
 linref
   CN = linCN ;
 }
--- a/src/hungarian/NounHun.gf
+++ b/src/hungarian/NounHun.gf
@@ -11,7 +11,8 @@ concrete NounHun of Noun = CatHun ** open ResHun, Prelude, Coordination in {
    s = \\c => case det.caseagr of {
                  True  => det.s ! c ;
                  False => det.s ! Nom
-               } ++ cn.s ! det.n ! c ;
+               } ++ cn.s ! det.n ! case2stem c
                 ++ cn.rs ! det.n ! c ;
    agr = <P3,det.n> ;
    } ;
@@ -159,7 +160,9 @@ concrete NounHun of Noun = CatHun ** open ResHun, Prelude, Coordination in {
  -- : N -> CN
  -- : N2 -> CN ;
-  UseN,UseN2 = \n -> n ;
+  UseN,UseN2 = \n -> n ** {
    rs = \\_,_ => [] ;
    } ;
  -- : N2 -> NP -> CN ;
  -- ComplN2 n2 np =
@@ -181,7 +184,7 @@ concrete NounHun of Noun = CatHun ** open ResHun, Prelude, Coordination in {
  -- : CN -> RS  -> CN ;
  RelCN cn rs = cn ** {
-    s = \\n,c => cn.s ! n ! c ++ rs.s ! n ! c
+    rs = \\n,c => rs.s ! n ! c
    } ;
  -- : CN -> Adv -> CN ;
--- a/src/hungarian/NounMorphoHun.gf
+++ b/src/hungarian/NounMorphoHun.gf
@@ -1,7 +1,7 @@
 resource NounMorphoHun = ParamHun ** open Prelude, Predef in {
 oper
-  Noun = {s : Number => Case => Str} ;
+  Noun = {s : Number => Case => Str ; h : Harm} ;
  -- Paradigm functions
  -- http://www.cse.chalmers.se/~aarne/articles/smart-preprint.pdf
@@ -12,15 +12,15 @@ oper
        -- Apply mkNoun to the lengthened stem "almá" or "kefé"
        nAlmá : Noun = mkNoun almá ;
-     in {s = \\n,c => case <n,c> of {
+     in nAlmá ** {
          s = \\n,c => case <n,c> of {
                -- Singular nominative uses the given form, e.g. "alma" or "kefe"
                <Sg,Nom> => alma ;
                -- The rest of the forms are formed with the regular constructor,
                -- using "almá" or "kefé" as the stem.
-                _ => nAlmá.s ! n ! c
+                _ => nAlmá.s ! n ! c }
-              } ;
+          } ;
        } ;
  --Handles words like "ló, lé, kő" which are "lovak, levek, kövek" in plural.
  --Also handles "tó, hó" which are "tavak, havak" in plural!
@@ -31,7 +31,8 @@ oper
        nLov = mkNoun lov ;
        nLova = mkNoun lova ;
        nLó = mkNoun ló ;
-    in {s = \\n,c => case <n,c> of {
+    in nLova ** {
         s = \\n,c => case <n,c> of {
                -- All plural forms and Sg Acc, Sg Sup use the "lova" stem
                <Pl,_>| <Sg,Acc> => nLova.s ! n ! c ;
@@ -39,10 +40,8 @@ oper
                -- The rest of the forms are formed with the regular constructor,
                -- using "ló" as the stem.
-                _ => nLó.s ! n ! c
+                _ => nLó.s ! n ! c }
-
+         } ;
              } ;
        } ;
    -- NB. arguments are Sg Nom, Pl Nom
    -- handles words like: falu, daru, tetű -> falvak, darvak, tetvek
@@ -50,17 +49,15 @@ oper
      let falva = init falvak ;
          nFalva = mkNoun falva ;
          nFalu = mkNoun falu ;
-       in {s = \\n,c => case <n,c> of {
+       in nFalu ** {
            s = \\n,c => case <n,c> of {
               -- All plural forms and Sg Acc, Sg Sup use the "lova" stem
               <Pl,_>   => nFalva.s ! n ! c ;
               -- The rest of the forms are formed with the regular constructor,
               -- using "ló" as the stem.
-               _ => nFalu.s ! n ! c
+               _ => nFalu.s ! n ! c }
-
+            } ;
             } ;
       } ;
  --Handles words like "gyomor, majom, retek" which are "gyomrot, majmot, retket" in accusative (wovel dropping base)
@@ -74,16 +71,17 @@ oper
        majm = init majmo ;
        nMajmo = mkNoun majmo ;
        nMajom = mkNoun majom ;
-    in {s = \\n,c => case <n,c> of {
+    in nMajmo ** {
-        -- All plural forms and Sg Acc and Sg Sup use the "majmo" stem
+         s = \\n,c => case <n,c> of {
-        <Pl,_> | <Sg,Acc> => nMajmo.s ! n ! c ;
+            -- All plural forms and Sg Acc and Sg Sup use the "majmo" stem
-        <Sg,Sup> => nMajmo.s ! n ! c ;
+            <Pl,_> | <Sg,Acc> => nMajmo.s ! n ! c ;
            <Sg,Sup> => nMajmo.s ! n ! c ;
-        -- The rest of the forms are formed with the regular constructor,
+            -- The rest of the forms are formed with the regular constructor,
-        -- using "majom" as the stem.
+            -- using "majom" as the stem.
-        _ => nMajom.s ! n ! c
+            _ => nMajom.s ! n ! c
-      } ;
+            }
-    } ;
+          } ;
  -- Generic constructor for cases with different stem in Sg Nom and Sg Gen.
  -- Assumes that Sg Gen and all plurals have genitive stem, others Sg Nom stem.
@@ -92,15 +90,16 @@ oper
    let tolla = init tollat ;
        nTolla = mkNoun tolla ;
        nToll = mkNoun toll ;
-     in {s = \\n,c => case <n,c> of {
+     in nTolla ** {
-         -- All plural forms and Sg Acc use the "tolla" stem
+          s = \\n,c => case <n,c> of {
-         <Pl,_> | <Sg,Acc> => nTolla.s ! n ! c ;
+             -- All plural forms and Sg Acc use the "tolla" stem
             <Pl,_> | <Sg,Acc> => nTolla.s ! n ! c ;
-         -- The rest of the forms are formed with the regular constructor,
+             -- The rest of the forms are formed with the regular constructor,
-         -- using "toll" as the stem.
+             -- using "toll" as the stem.
-         _ => nToll.s ! n ! c
+             _ => nToll.s ! n ! c
-       }
+            }
-     } ;
+          } ;
  -- More words not covered by current paradigms:
  -- https://cl.lingfil.uu.se/~bea/publ/megyesi-hungarian.pdf
@@ -160,7 +159,7 @@ regNounNomAcc : (nom : Str) -> (acc : Str) -> Noun = \n,a ->
  -- Here we guess the genitive form and give it to appropriate 2-arg paradigm
  regNoun : Str -> Noun = \sgnom -> case sgnom of {
    _  + ("a"|"e")         => dAlma sgnom (lengthen sgnom + "t") ;
-    ("nyár"|"név")         => dToll sgnom (név2nevet sgnom) ;
+    -- Words like nyár, név need to use 2-arg smart paradigm
    (#c|"")+("á"|"é")+ #c  => mkNoun sgnom ;
    _  + ("á"|"é") + #c    => dToll sgnom (név2nevet sgnom) ;
    _  + ("ó"|"é"|"ő"|"ű") => dLó sgnom (ló2lovat sgnom) ;
@@ -302,21 +301,21 @@ oper
  -- Variant of case forms when the noun stem ends in consonant.
  endCaseCons : Case -> HarmForms = \c -> case c of {
    Nom => harm1 [] ;
    Acc => harm3 "ot" "et" "öt" ;
    Dat => harm "nak" "nek" ;
    Ill => harm "ba" "be" ;
    Ine => harm "ban" "ben" ;
    Ela => harm "ból" "ből" ;
    All => harm3 "hoz" "hez" "höz" ;
    Ade => harm "nál" "nél" ;
    Abl => harm "tól" "től" ;
    Sub => harm "ra" "re" ;
    Sup => harm3 "on" "en" "ön" ;
    Del => harm "ról" "ről" ;
    Cau => harm1 "ért" ;
    Ins => harm "al" "el" ;
-    Tra => harm "á" "é"
+    Tra => harm "á" "é" ;
    Dat => harm "nak" "nek" ;
    _Nom => harm1 []
    -- All => harm3 "hoz" "hez" "höz" ;
    -- Ade => harm "nál" "nél" ;
    -- Abl => harm "tól" "től" ;
    -- Sub => harm "ra" "re" ;
    -- Ill => harm "ba" "be" ;
    -- Ine => harm "ban" "ben" ;
    -- Ela => harm "ból" "ből" ;
    -- Del => harm "ról" "ről" ;
    -- Cau => harm1 "ért" ;
    -- Ess => harm "stul" "stül" ;  -- Essive-modal 'with <the noun> and its parts'
    -- Ter => harm1 "ig" ; -- Terminative 'as far as <the noun>'
    -- For => harm1 "ként" ; -- Formal 'as <the noun>'
@@ -387,7 +386,8 @@ oper
                           False => duplicateLast w } ;
        -- Noun is {s : Number => Case => Str}, we construct nested tables.
-     in {s = table {
+     in {h = h ;
         s = table {
               Sg => table {
                       -- Double the last letter (if consonant) before Ins, Tra
                       c@(Ins|Tra) => duplConsStem + endCaseSg c ! h ;
@@ -401,6 +401,7 @@ oper
                       -- If we add possessive forms with allomorph -i, then revise.
                       c@_         => w + plural +       endCasePl c ! h }
             }
-   } ;
+
        } ;
 }
--- a/src/hungarian/ParamHun.gf
+++ b/src/hungarian/ParamHun.gf
@@ -20,38 +20,46 @@ oper
 param
-  Case = Nom | Acc | Dat
+  Case =
-    --   | PossStem  -- TODO: Stem where possessive suffixes attach?
+    Nom | Acc  -- Practical to have core cases as full strings
-       | Ill | Ine | Ela | All | Ade | Abl | Sub | Sup | Del -- Locatives
+  | Dat     -- Would be nice but is very regular, so skip it
-       | Cau  -- Causal-final 'for the purpose of, for the reason that'
+  | Sup        -- Depends on the word which stem it uses
-       | Ins  -- Instrumental
+  -- | All     -- Can have irregularities in suffix (k)
-       | Tra  -- Translative
+  | Ins | Tra  -- Different for vowels and consonants
  | OblStem ;  -- The rest of the cases are regular and attach to this stem
       -- | Ill | Ine | Ela  | Ade | Abl | Sub | Sup | Del -- Locatives
       -- | Cau  -- Causal-final 'for the purpose of, for the reason that'
       -- | Ins  -- Instrumental
       -- | Tra  -- Translative
       -- | Ess | Ter | For
       -- | Tem -- Temporal, e.g. hatkor ‘six o’clock’ (from hat ‘6’)
       ;
  SubjCase = SCNom | SCDat ; -- Limited set of subject cases
  Possessor = NoPoss | Poss Number Person ;
 oper
  case2stem : Case -> Case = id Case ; -- TODO add stems and cases as separate types
  caseTable : (x1,_,_,_,_,_,_,_,_,_,_,_,_,_,x15 : Str) -> Case=>Str =
   \n,a,d,il,ine,el,al,ad,ab,sub,sup,del,ca,ins,tra -> table {
      Nom => n ;
      Acc => a ;
      Dat => d ;
      Ins => ins ;
      Tra => tra ;
      OblStem => init a ;
      Sup => sup ;
      Sub => sub ;
      Del => del ;
      Ill => il ;
      Ine => ine ;
      Ela => el ;
      All => al ;
      Ade => ad ;
      Abl => ab ;
-      Sub => sub ;
+      Cau => ca } ;
      Sup => sup ;
      Del => del ;
      Cau => ca ;
      Ins => ins ;
      Tra => tra } ;
  sc2case : SubjCase -> Case = \sc ->
    case sc of {
--- a/src/hungarian/ResHun.gf
+++ b/src/hungarian/ResHun.gf
@@ -13,10 +13,15 @@ resource ResHun = NounMorphoHun ** open Prelude, Predef in {
 -- Noun morphology is in NounMorphoHun
 oper
  CNoun : Type = Noun ** {
    rs : Number => Case => Str ;
    } ;
  BaseNP : Type = {
    agr : Person*Number ;
    objdef : ObjDef ;
    empty : Str ; -- standard trick for pro-drop
    h : Harm ;
    } ;
  NounPhrase : Type = BaseNP ** {
@@ -28,6 +33,7 @@ oper
    agr = <P3,Sg> ;
    objdef = Indef ;
    empty = [] ;
    h = H_e ;
    } ;
  indeclNP : Str -> NounPhrase = \s -> emptyNP ** {s = \\c => s} ;
@@ -37,6 +43,8 @@ oper
    n = n ;
    objdef = Def ;
    } ;
  linCN : CNoun -> Str = \cn -> cn.s ! Sg ! Nom ++ cn.rs ! Sg ! Nom ;
 --------------------------------------------------------------------------------
 -- Pronouns