forked from GitHub/gf-rgl
Merge pull request #16 from odanoburu/por-num
add small treebank for numerals and improve NumeralPor
This commit is contained in:
@@ -8,8 +8,7 @@
|
|||||||
-- $ParadigmsPor$, which gives a higher-level access to this module.
|
-- $ParadigmsPor$, which gives a higher-level access to this module.
|
||||||
|
|
||||||
resource MorphoPor = CommonRomance, ResPor **
|
resource MorphoPor = CommonRomance, ResPor **
|
||||||
open PhonoPor, Prelude, Predef,
|
open PhonoPor, Prelude, Predef, CatPor in {
|
||||||
CatPor in {
|
|
||||||
|
|
||||||
flags optimize=all ;
|
flags optimize=all ;
|
||||||
coding=utf8 ;
|
coding=utf8 ;
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
|
concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
|
||||||
open CommonRomance, ResRomance, MorphoPor, Prelude in {
|
open CommonRomance, ResRomance, MorphoPor, Prelude, Predef in {
|
||||||
|
|
||||||
flags coding=utf8 ;
|
flags coding=utf8 ;
|
||||||
|
|
||||||
|
param
|
||||||
|
DForm = unit | teen | ten | hundred ;
|
||||||
|
|
||||||
lincat
|
lincat
|
||||||
|
--- cardinals are generally not inflected by gender, however 1 and 2
|
||||||
|
--- are, as are the hundreds from 2 to 9
|
||||||
Digit = {s : DForm => CardOrd => Str} ;
|
Digit = {s : DForm => CardOrd => Str} ;
|
||||||
Sub10 = {s : DForm => CardOrd => Str ; n : Number} ;
|
Sub10 = {s : DForm => CardOrd => Str ; n : Number} ;
|
||||||
Sub100 = {s : CardOrd => Str ; n : Number} ;
|
Sub100 = {s : CardOrd => Str ; n : Number} ;
|
||||||
@@ -44,42 +49,67 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
|
|||||||
"novecentos" "nono" "nonagésimo" "noningentésimo";
|
"novecentos" "nono" "nonagésimo" "noningentésimo";
|
||||||
|
|
||||||
pot01 =
|
pot01 =
|
||||||
let um = (mkTal "um" "onze" "dez" "cento" "primeiro"
|
let um = (mkTal "um" "onze" "dez" "centos" "primeiro"
|
||||||
"décimo" "centésimo").s in
|
"décimo" "centésimo").s in
|
||||||
{s =\\f,g => case <f,g> of {
|
{s =\\f,g => case <f,g> of {
|
||||||
<unit, NCard Fem> => "uma" ;
|
<unit, NCard Fem> => "uma" ;
|
||||||
|
<hundred, NCard _> => "cento" ;
|
||||||
_ => um ! f ! g
|
_ => um ! f ! g
|
||||||
} ;
|
} ;
|
||||||
n = Sg
|
n = Sg
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
pot0 d = {s = d.s ; n = Pl} ;
|
pot0 d = {s = d.s ; n = Pl} ;
|
||||||
|
|
||||||
pot110 = spl (pot01.s ! ten) ;
|
pot110 = spl (pot01.s ! ten) ;
|
||||||
|
|
||||||
pot111 = spl (pot01.s ! teen) ;
|
pot111 = spl (pot01.s ! teen) ;
|
||||||
|
|
||||||
pot1to19 d = spl (d.s ! teen) ;
|
pot1to19 d = spl (d.s ! teen) ;
|
||||||
|
|
||||||
pot0as1 n = {s = n.s ! unit ; n = n.n} ;
|
pot0as1 n = {s = n.s ! unit ; n = n.n} ;
|
||||||
|
|
||||||
pot1 d = spl (d.s ! ten) ;
|
pot1 d = spl (d.s ! ten) ;
|
||||||
|
|
||||||
pot1plus d e =
|
pot1plus d e =
|
||||||
{s = \\g => d.s ! ten ! g
|
{s = \\g => d.s ! ten ! g
|
||||||
++ e_CardOrd g ++ e.s ! unit ! g ;
|
++ e_CardOrd g ++ e.s ! unit ! g ;
|
||||||
n = Pl} ;
|
n = Pl} ;
|
||||||
|
|
||||||
pot1as2 n = n ;
|
pot1as2 n = n ;
|
||||||
|
|
||||||
pot2 d =
|
pot2 d =
|
||||||
let n = case d.n of {
|
let n = case d.n of {
|
||||||
Sg => mkTal [] [] [] "cem" [] [] "centésimo" ;
|
Sg => mkNumStr "cem" "centésimo" ;
|
||||||
_ => d
|
_ => d.s ! hundred
|
||||||
}
|
}
|
||||||
in spl (n.s ! hundred) ;
|
in spl n ;
|
||||||
|
|
||||||
pot2plus d e =
|
pot2plus d e =
|
||||||
{s = \\g => d.s ! hundred ! g
|
{s = \\g => d.s ! hundred ! g
|
||||||
++ e_CardOrd g ++ e.s ! g ;
|
++ e_CardOrd g ++ e.s ! g ;
|
||||||
n = Pl} ;
|
n = Pl} ;
|
||||||
|
|
||||||
pot2as3 n = n ;
|
pot2as3 n = n ;
|
||||||
pot3 n = spl (\\g => n.s ! NCard Masc ++ mil g) ;
|
|
||||||
pot3plus n m = {s = \\g => n.s ! NCard Masc
|
pot3 n =
|
||||||
++ mil g ++ e_CardOrd g
|
let n = case n.n of {
|
||||||
++ m.s ! g ;
|
Sg => [] ;
|
||||||
n = Pl} ;
|
_ => n.s ! NCard Masc
|
||||||
|
} ;
|
||||||
|
in spl (\\co => n ++ mil ! co) ;
|
||||||
|
|
||||||
|
pot3plus n m =
|
||||||
|
let n = case n.n of {
|
||||||
|
Sg => [] ;
|
||||||
|
_ => n.s ! NCard Masc
|
||||||
|
} ;
|
||||||
|
in {s = \\co => n ++ mil ! co
|
||||||
|
-- actually, 'e' only if m is exact hundred (pot2) or
|
||||||
|
-- lower
|
||||||
|
++ e_CardOrd co
|
||||||
|
++ m.s ! co ;
|
||||||
|
n = Pl} ;
|
||||||
|
|
||||||
oper
|
oper
|
||||||
mkTal : (_,_,_,_,_,_,_ : Str) -> {s : DForm => CardOrd => Str} =
|
mkTal : (_,_,_,_,_,_,_ : Str) -> {s : DForm => CardOrd => Str} =
|
||||||
@@ -88,33 +118,41 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
|
|||||||
<unit, NCard _> => dois ;
|
<unit, NCard _> => dois ;
|
||||||
<teen, NCard _> => doze ;
|
<teen, NCard _> => doze ;
|
||||||
<ten, NCard _> => vinte ;
|
<ten, NCard _> => vinte ;
|
||||||
<hundred, NCard _> => duzentos ;
|
<hundred, NCard g> => regCard (tk 1 duzentos) g Pl ;
|
||||||
<unit, NOrd g n> => (regCard segundo) g n ;
|
<unit, NOrd g n> => regCard segundo g n ;
|
||||||
<teen, NOrd g n> => (regCard "décimo") g n ++ (regCard segundo) g n ;
|
<teen, NOrd g n> => (regCard "décimo") g n ++ (regCard segundo) g n ;
|
||||||
<ten, NOrd g n> => regCard vigesimo g n ;
|
<ten, NOrd g n> => regCard vigesimo g n ;
|
||||||
<hundred, NOrd g n> => regCard duocentesimo g n
|
<hundred, NOrd g n> => regCard duocentesimo g n
|
||||||
}
|
}
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
regCard : Str -> Gender -> Number -> Str = \vigesimo ->
|
regCard : Str -> Gender -> Number -> Str ;
|
||||||
pronForms (adjPreto vigesimo) ;
|
regCard vigesimo = case vigesimo of {
|
||||||
|
-- to handle milhão case (in ParseExtend module)
|
||||||
|
milh + "ão" => \g, n -> genNumForms vigesimo vigesimo (milh + "ões") vigesimo ! g ! n;
|
||||||
|
_ => pronForms (adjPreto vigesimo)
|
||||||
|
} ;
|
||||||
|
|
||||||
spl : (CardOrd => Str) -> {s : CardOrd => Str ; n : Number} = \s -> {
|
spl : (CardOrd => Str) -> {s : CardOrd => Str ; n : Number} = \s -> {
|
||||||
s = s ;
|
s = s ;
|
||||||
n = Pl
|
n = Pl
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
mil : CardOrd -> Str = \g ->
|
mkNumStr : Str -> Str -> CardOrd => Str ;
|
||||||
(mkTal "mil" [] [] [] "milésimo" [] []).s ! unit ! g ;
|
mkNumStr cem centesimo = \\co =>
|
||||||
|
case co of {
|
||||||
|
NCard _ => cem ;
|
||||||
|
NOrd g n => regCard centesimo g n
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mil : CardOrd => Str ;
|
||||||
|
mil = mkNumStr "mil" "milésimo" ;
|
||||||
|
|
||||||
e_CardOrd : CardOrd -> Str = \co -> case co of {
|
e_CardOrd : CardOrd -> Str = \co -> case co of {
|
||||||
NCard _ => "e" ;
|
NCard _ => "e" ;
|
||||||
_ => []
|
_ => []
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
param
|
|
||||||
DForm = unit | teen | ten | hundred ;
|
|
||||||
|
|
||||||
---
|
---
|
||||||
-- numerals as sequences of digits
|
-- numerals as sequences of digits
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ Available treebanks:
|
|||||||
rgl-exx.txt -- an old text treebank for RGL copied from GF/test/exx-resource.gfs
|
rgl-exx.txt -- an old text treebank for RGL copied from GF/test/exx-resource.gfs
|
||||||
rgl-api-trees.txt -- the examples of the library synopsis in core RGL terms
|
rgl-api-trees.txt -- the examples of the library synopsis in core RGL terms
|
||||||
ud-rgl-trees.txt -- trees constructed from Universal Dependencies documentation by using words from the common RGL Lexicon
|
ud-rgl-trees.txt -- trees constructed from Universal Dependencies documentation by using words from the common RGL Lexicon
|
||||||
|
numeral-trees.txt -- trees representing a variety of numerals
|
||||||
|
|
||||||
To run a treebank on a language:
|
To run a treebank on a language:
|
||||||
|
|
||||||
|
|||||||
10
treebanks/numeral-trees.txt
Normal file
10
treebanks/numeral-trees.txt
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
num (pot2as3 (pot2 pot01))
|
||||||
|
num (pot2as3 (pot2plus pot01 (pot0as1 pot01)))
|
||||||
|
num (pot2as3 (pot2 (pot0 n2)))
|
||||||
|
num (pot2as3 (pot2plus (pot0 n2) (pot0as1 pot01)))
|
||||||
|
num (pot3 (pot1as2 (pot0as1 pot01)))
|
||||||
|
num (pot3plus (pot1as2 (pot0as1 pot01)) (pot1as2 (pot0as1 pot01)))
|
||||||
|
num (pot3 (pot1as2 (pot0as1 (pot0 n2))))
|
||||||
|
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot1as2 (pot0as1 pot01)))
|
||||||
|
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot2 (pot0 n8)))
|
||||||
|
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot2plus (pot0 n8) (pot1plus n3 (pot0 n2))))
|
||||||
Reference in New Issue
Block a user