From 37d4d3672c46248a1e75874e19f084d9e4860de4 Mon Sep 17 00:00:00 2001 From: odanoburu Date: Tue, 11 Sep 2018 21:11:09 +0000 Subject: [PATCH 1/4] (Por) corrects edge cases in NumeralPor --- src/portuguese/NumeralPor.gf | 52 +++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/src/portuguese/NumeralPor.gf b/src/portuguese/NumeralPor.gf index bb734f116..953d0ea60 100644 --- a/src/portuguese/NumeralPor.gf +++ b/src/portuguese/NumeralPor.gf @@ -1,9 +1,14 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** - open CommonRomance, ResRomance, MorphoPor, Prelude in { + open CommonRomance, ResRomance, MorphoPor, Prelude, Predef in { flags coding=utf8 ; + param + DForm = unit | teen | ten | hundred ; + lincat + --- cardinals are generally not inflected by gender, however 1 and 2 + --- are, as are the hundreds from 2 to 9 Digit = {s : DForm => CardOrd => Str} ; Sub10 = {s : DForm => CardOrd => Str ; n : Number} ; Sub100 = {s : CardOrd => Str ; n : Number} ; @@ -44,10 +49,11 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** "novecentos" "nono" "nonagésimo" "noningentésimo"; pot01 = - let um = (mkTal "um" "onze" "dez" "cento" "primeiro" + let um = (mkTal "um" "onze" "dez" "centos" "primeiro" "décimo" "centésimo").s in {s =\\f,g => case of { => "uma" ; + => "cento" ; _ => um ! f ! g } ; n = Sg @@ -66,20 +72,32 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** pot1as2 n = n ; pot2 d = let n = case d.n of { - Sg => mkTal [] [] [] "cem" [] [] "centésimo" ; - _ => d + Sg => cem ; + _ => d.s ! hundred } - in spl (n.s ! hundred) ; + in spl n ; pot2plus d e = {s = \\g => d.s ! hundred ! g ++ e_CardOrd g ++ e.s ! g ; n = Pl} ; pot2as3 n = n ; - pot3 n = spl (\\g => n.s ! NCard Masc ++ mil g) ; - pot3plus n m = {s = \\g => n.s ! NCard Masc - ++ mil g ++ e_CardOrd g - ++ m.s ! g ; - n = Pl} ; + pot3 n = + let n = case n.n of { + Sg => [] ; + _ => n.s ! NCard Masc + } ; + in spl (\\g => n ++ mil g) ; + pot3plus n m = + let n = case n.n of { + Sg => [] ; + _ => n.s ! NCard Masc + } ; + in {s = \\g => n ++ mil g + -- actually, 'e' only if m is exact hundred (pot2) or + -- lower + ++ e_CardOrd g + ++ m.s ! g ; + n = Pl} ; oper mkTal : (_,_,_,_,_,_,_ : Str) -> {s : DForm => CardOrd => Str} = @@ -88,8 +106,8 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** => dois ; => doze ; => vinte ; - => duzentos ; - => (regCard segundo) g n ; + => regCard (tk 1 duzentos) g Pl ; + => regCard segundo g n ; => (regCard "décimo") g n ++ (regCard segundo) g n ; => regCard vigesimo g n ; => regCard duocentesimo g n @@ -104,6 +122,13 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** n = Pl } ; + cem : CardOrd => Str ; + cem = \\co => + case co of { + NCard _ => "cem" ; + NOrd g n => regCard "centésimo" g n + } ; + mil : CardOrd -> Str = \g -> (mkTal "mil" [] [] [] "milésimo" [] []).s ! unit ! g ; @@ -112,9 +137,6 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** _ => [] } ; - param - DForm = unit | teen | ten | hundred ; - --- -- numerals as sequences of digits From 37169098ef7bfbfd8764957ce14cba2124a3ed63 Mon Sep 17 00:00:00 2001 From: odanoburu Date: Tue, 11 Sep 2018 23:19:09 -0300 Subject: [PATCH 2/4] (treebanks) add treebank with some numeral trees biased towards Portuguese edge cases, naturally --- treebanks/README | 1 + treebanks/numeral-trees.txt | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 treebanks/numeral-trees.txt diff --git a/treebanks/README b/treebanks/README index d2da183ae..4c2feceff 100644 --- a/treebanks/README +++ b/treebanks/README @@ -7,6 +7,7 @@ Available treebanks: rgl-exx.txt -- an old text treebank for RGL copied from GF/test/exx-resource.gfs rgl-api-trees.txt -- the examples of the library synopsis in core RGL terms ud-rgl-trees.txt -- trees constructed from Universal Dependencies documentation by using words from the common RGL Lexicon + numeral-trees.txt -- trees representing a variety of numerals To run a treebank on a language: diff --git a/treebanks/numeral-trees.txt b/treebanks/numeral-trees.txt new file mode 100644 index 000000000..640a12743 --- /dev/null +++ b/treebanks/numeral-trees.txt @@ -0,0 +1,10 @@ +num (pot2as3 (pot2 pot01)) +num (pot2as3 (pot2plus pot01 (pot0as1 pot01))) +num (pot2as3 (pot2 (pot0 n2))) +num (pot2as3 (pot2plus (pot0 n2) (pot0as1 pot01))) +num (pot3 (pot1as2 (pot0as1 pot01))) +num (pot3plus (pot1as2 (pot0as1 pot01)) (pot1as2 (pot0as1 pot01))) +num (pot3 (pot1as2 (pot0as1 (pot0 n2)))) +num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot1as2 (pot0as1 pot01))) +num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot2 (pot0 n8))) +num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot2plus (pot0 n8) (pot1plus n3 (pot0 n2)))) From 6fc46c21763ba8ea04edfd7295545057f6632620 Mon Sep 17 00:00:00 2001 From: odanoburu Date: Wed, 12 Sep 2018 17:07:07 +0000 Subject: [PATCH 3/4] (Por) Numeral: create new oper and use it --- src/portuguese/NumeralPor.gf | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/portuguese/NumeralPor.gf b/src/portuguese/NumeralPor.gf index 953d0ea60..eb3b1a057 100644 --- a/src/portuguese/NumeralPor.gf +++ b/src/portuguese/NumeralPor.gf @@ -60,43 +60,55 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** } ; pot0 d = {s = d.s ; n = Pl} ; + pot110 = spl (pot01.s ! ten) ; + pot111 = spl (pot01.s ! teen) ; + pot1to19 d = spl (d.s ! teen) ; + pot0as1 n = {s = n.s ! unit ; n = n.n} ; + pot1 d = spl (d.s ! ten) ; + pot1plus d e = {s = \\g => d.s ! ten ! g ++ e_CardOrd g ++ e.s ! unit ! g ; n = Pl} ; + pot1as2 n = n ; + pot2 d = let n = case d.n of { - Sg => cem ; + Sg => mkNumStr "cem" "centésimo" ; _ => d.s ! hundred } in spl n ; + pot2plus d e = {s = \\g => d.s ! hundred ! g ++ e_CardOrd g ++ e.s ! g ; n = Pl} ; + pot2as3 n = n ; + pot3 n = let n = case n.n of { Sg => [] ; _ => n.s ! NCard Masc } ; - in spl (\\g => n ++ mil g) ; + in spl (\\co => n ++ mil ! co) ; + pot3plus n m = let n = case n.n of { Sg => [] ; _ => n.s ! NCard Masc } ; - in {s = \\g => n ++ mil g + in {s = \\co => n ++ mil ! co -- actually, 'e' only if m is exact hundred (pot2) or -- lower - ++ e_CardOrd g - ++ m.s ! g ; + ++ e_CardOrd co + ++ m.s ! co ; n = Pl} ; oper @@ -122,15 +134,15 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** n = Pl } ; - cem : CardOrd => Str ; - cem = \\co => + mkNumStr : Str -> Str -> CardOrd => Str ; + mkNumStr cem centesimo = \\co => case co of { - NCard _ => "cem" ; - NOrd g n => regCard "centésimo" g n + NCard _ => cem ; + NOrd g n => regCard centesimo g n } ; - mil : CardOrd -> Str = \g -> - (mkTal "mil" [] [] [] "milésimo" [] []).s ! unit ! g ; + mil : CardOrd => Str ; + mil = mkNumStr "mil" "milésimo" ; e_CardOrd : CardOrd -> Str = \co -> case co of { NCard _ => "e" ; From 625bf5485f222ea0d53c8576eaac557dcfa1e782 Mon Sep 17 00:00:00 2001 From: odanoburu Date: Wed, 12 Sep 2018 18:02:49 +0000 Subject: [PATCH 4/4] (Por) Numeral: add spacing and handle case of millions (not in RGL, only in ParseExtend) --- src/portuguese/MorphoPor.gf | 3 +-- src/portuguese/NumeralPor.gf | 32 ++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/portuguese/MorphoPor.gf b/src/portuguese/MorphoPor.gf index 211e0659c..54b31b10b 100644 --- a/src/portuguese/MorphoPor.gf +++ b/src/portuguese/MorphoPor.gf @@ -8,8 +8,7 @@ -- $ParadigmsPor$, which gives a higher-level access to this module. resource MorphoPor = CommonRomance, ResPor ** - open PhonoPor, Prelude, Predef, - CatPor in { + open PhonoPor, Prelude, Predef, CatPor in { flags optimize=all ; coding=utf8 ; diff --git a/src/portuguese/NumeralPor.gf b/src/portuguese/NumeralPor.gf index eb3b1a057..0e606a394 100644 --- a/src/portuguese/NumeralPor.gf +++ b/src/portuguese/NumeralPor.gf @@ -60,45 +60,45 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** } ; pot0 d = {s = d.s ; n = Pl} ; - + pot110 = spl (pot01.s ! ten) ; - + pot111 = spl (pot01.s ! teen) ; - + pot1to19 d = spl (d.s ! teen) ; - + pot0as1 n = {s = n.s ! unit ; n = n.n} ; - + pot1 d = spl (d.s ! ten) ; - + pot1plus d e = {s = \\g => d.s ! ten ! g ++ e_CardOrd g ++ e.s ! unit ! g ; n = Pl} ; - + pot1as2 n = n ; - + pot2 d = let n = case d.n of { Sg => mkNumStr "cem" "centésimo" ; _ => d.s ! hundred } in spl n ; - + pot2plus d e = {s = \\g => d.s ! hundred ! g ++ e_CardOrd g ++ e.s ! g ; n = Pl} ; - + pot2as3 n = n ; - + pot3 n = let n = case n.n of { Sg => [] ; _ => n.s ! NCard Masc } ; in spl (\\co => n ++ mil ! co) ; - + pot3plus n m = let n = case n.n of { Sg => [] ; @@ -126,8 +126,12 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** } } ; - regCard : Str -> Gender -> Number -> Str = \vigesimo -> - pronForms (adjPreto vigesimo) ; + regCard : Str -> Gender -> Number -> Str ; + regCard vigesimo = case vigesimo of { + -- to handle milhão case (in ParseExtend module) + milh + "ão" => \g, n -> genNumForms vigesimo vigesimo (milh + "ões") vigesimo ! g ! n; + _ => pronForms (adjPreto vigesimo) + } ; spl : (CardOrd => Str) -> {s : CardOrd => Str ; n : Number} = \s -> { s = s ;