From 3f30e0946e9f510b78bc370d90ddad1e9d61c6bb Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Fri, 27 Sep 2019 15:22:43 +0200 Subject: [PATCH] (Som) WIP better handling of multiple modifiers and numerals --- src/somali/NounSom.gf | 52 +++++++++++++++++++++++----------- src/somali/NumeralSom.gf | 17 +++++++---- src/somali/ParamSom.gf | 8 ++++++ src/somali/ResSom.gf | 8 ++++-- src/somali/StructuralSom.gf | 3 +- src/somali/unittest/num.gftest | 31 ++++++++++++++++++-- 6 files changed, 91 insertions(+), 28 deletions(-) diff --git a/src/somali/NounSom.gf b/src/somali/NounSom.gf index 20fd9719..2bc9db63 100644 --- a/src/somali/NounSom.gf +++ b/src/somali/NounSom.gf @@ -13,9 +13,9 @@ concrete NounSom of Noun = CatSom ** open ResSom, Prelude in { a = getAgr det.n (gender cn) } where { sTable : Case => Str = \\c => let nfc : {nf : NForm ; c : Case} = - case of { + case of { -- Numbers - => {nf=Numerative ; c=c} ; + => {nf=Numerative ; c=c} ; -- special form for fem. nouns <_,Nom,False,Indefinite,Sg> => {nf=NomSg ; c=c} ; @@ -30,13 +30,24 @@ concrete NounSom of Noun = CatSom ** open ResSom, Prelude in { _ => {nf=Def det.n ; c=c} } ; art = gda2da cn.gda ! det.n ; - num = case det.isNum of {True => Sg ; _ => det.n} ; + num = case isNum det.numtype of {True => Sg ; _ => det.n} ; dt : {pref,s : Str} = case of { - => {s = [] ; pref = det.s ! art ! nfc.c} ; -- determiner comes before CN - <_, True,_> => {pref = [] ; s = det.sp ! gender cn ! nfc.c} ; -- CN has undergone ComplN2 and is already quantified - <_,_, True> => {pref = [] ; s = BIND ++ det.shortPoss ! art} ; - _ => {pref = [] ; s = det.s ! art ! nfc.c} + -- Det is a cardinal number. The number is the head of the NP, + -- and CN becomes its modifier. If CN has modifiers of its own, + -- we insert the conjunction "oo" between the number and the CN. + => + let oo = case det.numtype of {Compound => "oo" ; _ => []} + in {s = [] ; pref = det.s ! art ! nfc.c ++ oo} ; + + -- CN has undergone ComplN2 and is already quantified + <_,True,_> => {pref = [] ; s = det.sp ! gender cn ! nfc.c} ; + + -- CN is e.g. a kinship term and takes short possessive + <_,_,True> => {pref = [] ; s = BIND ++ det.shortPoss ! art} ; + + -- Default case + _ => {pref = [] ; s = det.s ! art ! nfc.c} } ; in dt.pref -- if det is numeral ++ cn.s ! nfc.nf @@ -120,23 +131,19 @@ concrete NounSom of Noun = CatSom ** open ResSom, Prelude in { -- : Quant -> Num -> Det ; DetQuant quant num = let indep = Hal in quant ** { s = \\da,c => - case num.isNum of { + case isNum num.numtype of { True => num.s ! indep ++ quant.s ! num.da ! c ++ num.thousand ; False => num.s ! indep ++ quant.s ! da ! c ++ num.thousand } ; - sp = \\g,c => case of { -- TODO check what happens when num.isNum + sp = \\g,c => case of { => num.s ! indep ++ quant.sp ! SgMasc ! c ++ num.thousand ; => num.s ! indep ++ quant.sp ! SgFem ! c ++ num.thousand ; -- Independent form uses plural morpheme, not gender-flipped allomorph => num.s ! indep ++ quant.sp ! PlInv ! c ++ num.thousand } ; - isNum = num.isNum ; + numtype = num.numtype ; n = num.n ; shortPoss = \\da => quant.shortPoss ! da ++ num.s ! indep } ; - -- d = case of { - -- => Numerative ; - -- => Def num.n quant.v ; - -- => Indef num.n } ; -- : Quant -> Num -> Ord -> Det ; -- these five best DetQuantOrd quant num ord = @@ -157,7 +164,11 @@ concrete NounSom of Noun = CatSom ** open ResSom, Prelude in { NumPl = baseNum ** {n = Pl} ; -- : Card -> Num ; - NumCard card = card ** {isNum = True} ; + NumCard card = card ** { + numtype = case card.hasThousand of { + True => Compound ; + False => Basic } + } ; -- : Digits -> Card ; -- NumDigits dig = { s = dig.s ! NCard ; n = dig.n } ; @@ -245,8 +256,11 @@ concrete NounSom of Noun = CatSom ** open ResSom, Prelude in { Use3N3 n3 = lin N2 n3 ; -- : AP -> CN -> CN AdjCN ap cn = cn ** { - s = table { NomSg => cn.s ! Indef Sg ; -- When an adjective is added, noun loses case marker. - x => cn.s ! x } ; + s = table { -- Add oo after Numerative only if this is CN's first modifier. + Numerative => cn.s ! Numerative + ++ andConj Indefinite (notB cn.hasMod) ; + NomSg => cn.s ! Indef Sg ; -- Add adj -> noun loses case marker + nf => cn.s ! nf } ; mod = \\st,n,c => cn.mod ! st ! n ! Abs -- If there was something before, it is now in Abs ++ andConj st cn.hasMod -- If the sentence is already modified, any new modifier needs to be introduced with conjunction @@ -256,6 +270,10 @@ concrete NounSom of Noun = CatSom ** open ResSom, Prelude in { -- : CN -> RS -> CN ; RelCN cn rs = cn ** { + s = table { + Numerative => cn.s ! Numerative ++ andConj Indefinite (notB cn.hasMod) ; + NomSg => cn.s ! Indef Sg ; -- Add adj -> noun loses case marker + nf => cn.s ! nf } ; mod = \\st,n,c => --what to do with subject case if there's both adj and RS? cn.mod ! st ! n ! Abs ++ andConj st cn.hasMod diff --git a/src/somali/NumeralSom.gf b/src/somali/NumeralSom.gf index b674f132..0ace5e5d 100644 --- a/src/somali/NumeralSom.gf +++ b/src/somali/NumeralSom.gf @@ -40,6 +40,7 @@ lincat Sub10, Sub100, Sub1000, Sub1000000 = { s : DForm => Str ; thousand : Str ; -- TODO figure out if this really works so + hasThousand : Bool ; ord : Str ; da : DefArticle ; n : Number @@ -64,25 +65,28 @@ lin n7 = mkNum2 "toddoba" "toddobaatan" ; lin n8 = mkNum2Masc "siddeed" "siddeetan" ; lin n9 = mkNum2Masc "sagaal" "sagaashan" ; -lin pot01 = n1.unit ** {n = Sg ; thousand = []} ; +lin pot01 = n1.unit ** {n = Sg ; thousand = [] ; hasThousand = False} ; -lin pot0 d = d.unit ** {n = Pl ; thousand = []} ; +lin pot0 d = d.unit ** {n = Pl ; thousand = [] ; hasThousand = False} ; lin pot110 = n1.ten ** { s = \\df => n1.ten.s ; thousand = [] ; + hasThousand = False ; n = Pl } ; lin pot111 = { s = \\_ => "koob iyo" ++ n1.ten.s ; ord = "koob iyo" ++ n1.ten.ord ; thousand = [] ; + hasThousand = False ; da = M KA ; n = Pl } ; lin pot1to19 d = { s = \\_ => d.unit.s ! Hal ++ "iyo" ++ n1.ten.s ; thousand = [] ; + hasThousand = False ; ord = d.unit.s ! Hal ++ "iyo" ++ n1.ten.ord ; da = M KA ; n = Pl @@ -91,26 +95,31 @@ lin pot0as1 n = n ; lin pot1 d = d.ten ** { s = \\df => d.ten.s ; thousand = [] ; + hasThousand = False ; n = Pl } ; lin pot1plus d e = d.ten ** { s = \\b => e.s ! b ++ "iyo" ++ d.ten.s ; ord = e.s ! Hal ++ "iyo" ++ d.ten.ord ; thousand = [] ; + hasThousand = False ; n = Pl ; } ; lin pot1as2 n = n ; lin pot2 d = d ** { thousand = "boqol" ; + hasThousand = True ; ord = d.s ! Hal ++ "boqlaad" } ; lin pot2plus d e = d ** { thousand = "boqol iyo" ++ e.s ! Hal ; + hasThousand = True ; ord = d.s ! Hal ++ "boqol iyo" ++ e.ord ; n = Pl} ; lin pot2as3 n = n ; lin pot3 n = n ** { thousand = n.thousand ++ "kun" ; + hasThousand = True ; ord = n.s ! Hal ++ "kunaad" ; n = Pl } ; @@ -119,9 +128,7 @@ lin pot3plus n m = n ** { ord = n.ord ++ "kun iyo" ++ m.ord ; n = Pl} ; ---TODO: --- two thousand small cats --- => laba kun oo bisadood oo yar (kun and bisadood are both attributes) + ---------------------------------------------------------------------------- lincat Dig = TDigit ; diff --git a/src/somali/ParamSom.gf b/src/somali/ParamSom.gf index 1c93f681..b0c12e9a 100644 --- a/src/somali/ParamSom.gf +++ b/src/somali/ParamSom.gf @@ -210,6 +210,14 @@ param CardOrd = NOrd | NCard ; + -- to know whether to put oo in between numeral and CN + NumType = NoNum | Basic | Compound ; + +oper + isNum : NumType -> Bool = \nt -> case nt of { + NoNum => False ; + _ => True + } ; -------------------------------------------------------------------------------- -- Adjectives diff --git a/src/somali/ResSom.gf b/src/somali/ResSom.gf index c1bc1b9d..3a90d39b 100644 --- a/src/somali/ResSom.gf +++ b/src/somali/ResSom.gf @@ -279,7 +279,7 @@ oper Determiner : Type = BaseQuant ** { sp : Gender => Case => Str ; n : Number ; - isNum : Bool ; -- placement in NP + whether to choose Numerative from CN + numtype : NumType ; -- placement in NP + whether to choose Numerative from CN } ; Quant : Type = BaseQuant ** { @@ -289,6 +289,7 @@ oper BaseNum : Type = { s : DForm => Str ; -- independent or attribute thousand : Str ; -- TODO check where possessive suffix goes + hasThousand : Bool ; da : DefArticle ; n : Number } ; @@ -296,13 +297,14 @@ oper baseNum : Num = { s = \\_ => [] ; thousand = [] ; + hasThousand = False ; da = M KA ; n = Sg ; - isNum = False + numtype = NoNum } ; Num : Type = BaseNum ** { - isNum : Bool -- whether to choose Numerative as the value of NForm + numtype : NumType -- whether to choose Numerative as the value of NForm } ; Numeral : Type = BaseNum ** { diff --git a/src/somali/StructuralSom.gf b/src/somali/StructuralSom.gf index c66b9f9f..50ddf22d 100644 --- a/src/somali/StructuralSom.gf +++ b/src/somali/StructuralSom.gf @@ -68,7 +68,8 @@ lin much_Det = R.indefDet "" sg ; -} lin somePl_Det = { sp = \\_,_ => "qaar" ; - isPoss, isNum = False ; + isPoss = False ; + numtype = NoNum ; st = Definite ; -- NB. Indefinite means actually only IndefArt. n = Pl ; s = \\x,_ => BIND ++ defStems ! x ++ BIND ++ "a qaarkood" ; diff --git a/src/somali/unittest/num.gftest b/src/somali/unittest/num.gftest index d0e90762..efa0dc0e 100644 --- a/src/somali/unittest/num.gftest +++ b/src/somali/unittest/num.gftest @@ -1,8 +1,11 @@ +------------------------------- +-- Numerals with determiners -- +------------------------------- + -- LangEng: the two cats LangSom: laba BIND da bisadood Lang: PhrUtt NoPConj (UttNP (DetCN (DetQuant DefArt (NumCard (NumNumeral (num (pot2as3 (pot1as2 (pot0as1 (pot0 n2)))))))) (UseN cat_N))) NoVoc - -- LangEng: those three men LangSom: saddex BIND daas nin Lang: PhrUtt NoPConj (UttNP (DetCN (DetQuant that_Quant (NumCard (NumNumeral (num (pot2as3 (pot1as2 (pot0as1 (pot0 n3)))))))) (UseN man_N))) NoVoc @@ -25,4 +28,28 @@ Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron he_ -- LangEng: he is my first man LangSom: waa nin BIND kayg BIND a kowaad -Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron he_Pron) (UseComp (CompNP (DetCN (DetQuantOrd (PossPron i_Pron) NumSg (OrdNumeral (num (pot2as3 (pot1as2 (pot0as1 pot01)))))) (UseN man_N))))))) NoVoc \ No newline at end of file +Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron he_Pron) (UseComp (CompNP (DetCN (DetQuantOrd (PossPron i_Pron) NumSg (OrdNumeral (num (pot2as3 (pot1as2 (pot0as1 pot01)))))) (UseN man_N))))))) NoVoc + +-------------------------------------- +-- Numerals with multiple modifiers -- +-------------------------------------- + +-- LangEng: two cats +LangSom: laba bisadood +Lang: PhrUtt NoPConj (UttNP (DetCN (DetQuant IndefArt (NumCard (NumNumeral (num (pot2as3 (pot1as2 (pot0as1 (pot0 n2)))))))) (UseN cat_N))) NoVoc + +-- LangEng: two small cats +LangSom: laba bisadood oo yar +Lang: PhrUtt NoPConj (UttNP (DetCN (DetQuant IndefArt (NumCard (NumNumeral (num (pot2as3 (pot1as2 (pot0as1 (pot0 n2)))))))) (AdjCN (PositA small_A) (UseN cat_N)))) NoVoc + +-- LangEng: two small cats that have meat +LangSom: laba bisadood oo yar oo hilib leh +Lang: PhrUtt NoPConj (UttNP (DetCN (DetQuant IndefArt (NumCard (NumNumeral (num (pot2as3 (pot1as2 (pot0as1 (pot0 n2)))))))) (RelCN (AdjCN (PositA small_A) (UseN cat_N)) (UseRCl (TTAnt TPres ASimul) PPos (RelVP IdRP (ComplSlash (SlashV2a have_V2) (MassNP (UseN meat_N)))))))) NoVoc + +-- LangEng: two thousand cats +LangSom: laba kun oo bisadood +Lang: PhrUtt NoPConj (UttNP (DetCN (DetQuant IndefArt (NumCard (NumNumeral (num (pot3 (pot1as2 (pot0as1 (pot0 n2)))))))) (UseN cat_N))) NoVoc + +-- LangEng: two thousand small cats +LangSom: laba kun oo bisadood oo yar +Lang: PhrUtt NoPConj (UttNP (DetCN (DetQuant IndefArt (NumCard (NumNumeral (num (pot3 (pot1as2 (pot0as1 (pot0 n2)))))))) (AdjCN (PositA small_A) (UseN cat_N)))) NoVoc