1
0
forked from GitHub/gf-rgl

Merge pull request #16 from odanoburu/por-num

add small treebank for numerals and improve NumeralPor
This commit is contained in:
Inari Listenmaa
2018-09-17 09:19:18 +02:00
committed by GitHub
4 changed files with 69 additions and 21 deletions

View File

@@ -8,8 +8,7 @@
-- $ParadigmsPor$, which gives a higher-level access to this module. -- $ParadigmsPor$, which gives a higher-level access to this module.
resource MorphoPor = CommonRomance, ResPor ** resource MorphoPor = CommonRomance, ResPor **
open PhonoPor, Prelude, Predef, open PhonoPor, Prelude, Predef, CatPor in {
CatPor in {
flags optimize=all ; flags optimize=all ;
coding=utf8 ; coding=utf8 ;

View File

@@ -1,9 +1,14 @@
concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
open CommonRomance, ResRomance, MorphoPor, Prelude in { open CommonRomance, ResRomance, MorphoPor, Prelude, Predef in {
flags coding=utf8 ; flags coding=utf8 ;
param
DForm = unit | teen | ten | hundred ;
lincat lincat
--- cardinals are generally not inflected by gender, however 1 and 2
--- are, as are the hundreds from 2 to 9
Digit = {s : DForm => CardOrd => Str} ; Digit = {s : DForm => CardOrd => Str} ;
Sub10 = {s : DForm => CardOrd => Str ; n : Number} ; Sub10 = {s : DForm => CardOrd => Str ; n : Number} ;
Sub100 = {s : CardOrd => Str ; n : Number} ; Sub100 = {s : CardOrd => Str ; n : Number} ;
@@ -44,42 +49,67 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
"novecentos" "nono" "nonagésimo" "noningentésimo"; "novecentos" "nono" "nonagésimo" "noningentésimo";
pot01 = pot01 =
let um = (mkTal "um" "onze" "dez" "cento" "primeiro" let um = (mkTal "um" "onze" "dez" "centos" "primeiro"
"décimo" "centésimo").s in "décimo" "centésimo").s in
{s =\\f,g => case <f,g> of { {s =\\f,g => case <f,g> of {
<unit, NCard Fem> => "uma" ; <unit, NCard Fem> => "uma" ;
<hundred, NCard _> => "cento" ;
_ => um ! f ! g _ => um ! f ! g
} ; } ;
n = Sg n = Sg
} ; } ;
pot0 d = {s = d.s ; n = Pl} ; pot0 d = {s = d.s ; n = Pl} ;
pot110 = spl (pot01.s ! ten) ; pot110 = spl (pot01.s ! ten) ;
pot111 = spl (pot01.s ! teen) ; pot111 = spl (pot01.s ! teen) ;
pot1to19 d = spl (d.s ! teen) ; pot1to19 d = spl (d.s ! teen) ;
pot0as1 n = {s = n.s ! unit ; n = n.n} ; pot0as1 n = {s = n.s ! unit ; n = n.n} ;
pot1 d = spl (d.s ! ten) ; pot1 d = spl (d.s ! ten) ;
pot1plus d e = pot1plus d e =
{s = \\g => d.s ! ten ! g {s = \\g => d.s ! ten ! g
++ e_CardOrd g ++ e.s ! unit ! g ; ++ e_CardOrd g ++ e.s ! unit ! g ;
n = Pl} ; n = Pl} ;
pot1as2 n = n ; pot1as2 n = n ;
pot2 d = pot2 d =
let n = case d.n of { let n = case d.n of {
Sg => mkTal [] [] [] "cem" [] [] "centésimo" ; Sg => mkNumStr "cem" "centésimo" ;
_ => d _ => d.s ! hundred
} }
in spl (n.s ! hundred) ; in spl n ;
pot2plus d e = pot2plus d e =
{s = \\g => d.s ! hundred ! g {s = \\g => d.s ! hundred ! g
++ e_CardOrd g ++ e.s ! g ; ++ e_CardOrd g ++ e.s ! g ;
n = Pl} ; n = Pl} ;
pot2as3 n = n ; pot2as3 n = n ;
pot3 n = spl (\\g => n.s ! NCard Masc ++ mil g) ;
pot3plus n m = {s = \\g => n.s ! NCard Masc pot3 n =
++ mil g ++ e_CardOrd g let n = case n.n of {
++ m.s ! g ; Sg => [] ;
n = Pl} ; _ => n.s ! NCard Masc
} ;
in spl (\\co => n ++ mil ! co) ;
pot3plus n m =
let n = case n.n of {
Sg => [] ;
_ => n.s ! NCard Masc
} ;
in {s = \\co => n ++ mil ! co
-- actually, 'e' only if m is exact hundred (pot2) or
-- lower
++ e_CardOrd co
++ m.s ! co ;
n = Pl} ;
oper oper
mkTal : (_,_,_,_,_,_,_ : Str) -> {s : DForm => CardOrd => Str} = mkTal : (_,_,_,_,_,_,_ : Str) -> {s : DForm => CardOrd => Str} =
@@ -88,33 +118,41 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
<unit, NCard _> => dois ; <unit, NCard _> => dois ;
<teen, NCard _> => doze ; <teen, NCard _> => doze ;
<ten, NCard _> => vinte ; <ten, NCard _> => vinte ;
<hundred, NCard _> => duzentos ; <hundred, NCard g> => regCard (tk 1 duzentos) g Pl ;
<unit, NOrd g n> => (regCard segundo) g n ; <unit, NOrd g n> => regCard segundo g n ;
<teen, NOrd g n> => (regCard "décimo") g n ++ (regCard segundo) g n ; <teen, NOrd g n> => (regCard "décimo") g n ++ (regCard segundo) g n ;
<ten, NOrd g n> => regCard vigesimo g n ; <ten, NOrd g n> => regCard vigesimo g n ;
<hundred, NOrd g n> => regCard duocentesimo g n <hundred, NOrd g n> => regCard duocentesimo g n
} }
} ; } ;
regCard : Str -> Gender -> Number -> Str = \vigesimo -> regCard : Str -> Gender -> Number -> Str ;
pronForms (adjPreto vigesimo) ; regCard vigesimo = case vigesimo of {
-- to handle milhão case (in ParseExtend module)
milh + "ão" => \g, n -> genNumForms vigesimo vigesimo (milh + "ões") vigesimo ! g ! n;
_ => pronForms (adjPreto vigesimo)
} ;
spl : (CardOrd => Str) -> {s : CardOrd => Str ; n : Number} = \s -> { spl : (CardOrd => Str) -> {s : CardOrd => Str ; n : Number} = \s -> {
s = s ; s = s ;
n = Pl n = Pl
} ; } ;
mil : CardOrd -> Str = \g -> mkNumStr : Str -> Str -> CardOrd => Str ;
(mkTal "mil" [] [] [] "milésimo" [] []).s ! unit ! g ; mkNumStr cem centesimo = \\co =>
case co of {
NCard _ => cem ;
NOrd g n => regCard centesimo g n
} ;
mil : CardOrd => Str ;
mil = mkNumStr "mil" "milésimo" ;
e_CardOrd : CardOrd -> Str = \co -> case co of { e_CardOrd : CardOrd -> Str = \co -> case co of {
NCard _ => "e" ; NCard _ => "e" ;
_ => [] _ => []
} ; } ;
param
DForm = unit | teen | ten | hundred ;
--- ---
-- numerals as sequences of digits -- numerals as sequences of digits

View File

@@ -7,6 +7,7 @@ Available treebanks:
rgl-exx.txt -- an old text treebank for RGL copied from GF/test/exx-resource.gfs rgl-exx.txt -- an old text treebank for RGL copied from GF/test/exx-resource.gfs
rgl-api-trees.txt -- the examples of the library synopsis in core RGL terms rgl-api-trees.txt -- the examples of the library synopsis in core RGL terms
ud-rgl-trees.txt -- trees constructed from Universal Dependencies documentation by using words from the common RGL Lexicon ud-rgl-trees.txt -- trees constructed from Universal Dependencies documentation by using words from the common RGL Lexicon
numeral-trees.txt -- trees representing a variety of numerals
To run a treebank on a language: To run a treebank on a language:

View File

@@ -0,0 +1,10 @@
num (pot2as3 (pot2 pot01))
num (pot2as3 (pot2plus pot01 (pot0as1 pot01)))
num (pot2as3 (pot2 (pot0 n2)))
num (pot2as3 (pot2plus (pot0 n2) (pot0as1 pot01)))
num (pot3 (pot1as2 (pot0as1 pot01)))
num (pot3plus (pot1as2 (pot0as1 pot01)) (pot1as2 (pot0as1 pot01)))
num (pot3 (pot1as2 (pot0as1 (pot0 n2))))
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot1as2 (pot0as1 pot01)))
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot2 (pot0 n8)))
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot2plus (pot0 n8) (pot1plus n3 (pot0 n2))))