forked from GitHub/gf-rgl
Merge pull request #16 from odanoburu/por-num
add small treebank for numerals and improve NumeralPor
This commit is contained in:
@@ -8,8 +8,7 @@
|
||||
-- $ParadigmsPor$, which gives a higher-level access to this module.
|
||||
|
||||
resource MorphoPor = CommonRomance, ResPor **
|
||||
open PhonoPor, Prelude, Predef,
|
||||
CatPor in {
|
||||
open PhonoPor, Prelude, Predef, CatPor in {
|
||||
|
||||
flags optimize=all ;
|
||||
coding=utf8 ;
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
|
||||
open CommonRomance, ResRomance, MorphoPor, Prelude in {
|
||||
open CommonRomance, ResRomance, MorphoPor, Prelude, Predef in {
|
||||
|
||||
flags coding=utf8 ;
|
||||
|
||||
param
|
||||
DForm = unit | teen | ten | hundred ;
|
||||
|
||||
lincat
|
||||
--- cardinals are generally not inflected by gender, however 1 and 2
|
||||
--- are, as are the hundreds from 2 to 9
|
||||
Digit = {s : DForm => CardOrd => Str} ;
|
||||
Sub10 = {s : DForm => CardOrd => Str ; n : Number} ;
|
||||
Sub100 = {s : CardOrd => Str ; n : Number} ;
|
||||
@@ -44,42 +49,67 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
|
||||
"novecentos" "nono" "nonagésimo" "noningentésimo";
|
||||
|
||||
pot01 =
|
||||
let um = (mkTal "um" "onze" "dez" "cento" "primeiro"
|
||||
let um = (mkTal "um" "onze" "dez" "centos" "primeiro"
|
||||
"décimo" "centésimo").s in
|
||||
{s =\\f,g => case <f,g> of {
|
||||
<unit, NCard Fem> => "uma" ;
|
||||
<hundred, NCard _> => "cento" ;
|
||||
_ => um ! f ! g
|
||||
} ;
|
||||
n = Sg
|
||||
} ;
|
||||
|
||||
pot0 d = {s = d.s ; n = Pl} ;
|
||||
|
||||
pot110 = spl (pot01.s ! ten) ;
|
||||
|
||||
pot111 = spl (pot01.s ! teen) ;
|
||||
|
||||
pot1to19 d = spl (d.s ! teen) ;
|
||||
|
||||
pot0as1 n = {s = n.s ! unit ; n = n.n} ;
|
||||
|
||||
pot1 d = spl (d.s ! ten) ;
|
||||
|
||||
pot1plus d e =
|
||||
{s = \\g => d.s ! ten ! g
|
||||
++ e_CardOrd g ++ e.s ! unit ! g ;
|
||||
n = Pl} ;
|
||||
|
||||
pot1as2 n = n ;
|
||||
|
||||
pot2 d =
|
||||
let n = case d.n of {
|
||||
Sg => mkTal [] [] [] "cem" [] [] "centésimo" ;
|
||||
_ => d
|
||||
Sg => mkNumStr "cem" "centésimo" ;
|
||||
_ => d.s ! hundred
|
||||
}
|
||||
in spl (n.s ! hundred) ;
|
||||
in spl n ;
|
||||
|
||||
pot2plus d e =
|
||||
{s = \\g => d.s ! hundred ! g
|
||||
++ e_CardOrd g ++ e.s ! g ;
|
||||
n = Pl} ;
|
||||
|
||||
pot2as3 n = n ;
|
||||
pot3 n = spl (\\g => n.s ! NCard Masc ++ mil g) ;
|
||||
pot3plus n m = {s = \\g => n.s ! NCard Masc
|
||||
++ mil g ++ e_CardOrd g
|
||||
++ m.s ! g ;
|
||||
n = Pl} ;
|
||||
|
||||
pot3 n =
|
||||
let n = case n.n of {
|
||||
Sg => [] ;
|
||||
_ => n.s ! NCard Masc
|
||||
} ;
|
||||
in spl (\\co => n ++ mil ! co) ;
|
||||
|
||||
pot3plus n m =
|
||||
let n = case n.n of {
|
||||
Sg => [] ;
|
||||
_ => n.s ! NCard Masc
|
||||
} ;
|
||||
in {s = \\co => n ++ mil ! co
|
||||
-- actually, 'e' only if m is exact hundred (pot2) or
|
||||
-- lower
|
||||
++ e_CardOrd co
|
||||
++ m.s ! co ;
|
||||
n = Pl} ;
|
||||
|
||||
oper
|
||||
mkTal : (_,_,_,_,_,_,_ : Str) -> {s : DForm => CardOrd => Str} =
|
||||
@@ -88,33 +118,41 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] **
|
||||
<unit, NCard _> => dois ;
|
||||
<teen, NCard _> => doze ;
|
||||
<ten, NCard _> => vinte ;
|
||||
<hundred, NCard _> => duzentos ;
|
||||
<unit, NOrd g n> => (regCard segundo) g n ;
|
||||
<hundred, NCard g> => regCard (tk 1 duzentos) g Pl ;
|
||||
<unit, NOrd g n> => regCard segundo g n ;
|
||||
<teen, NOrd g n> => (regCard "décimo") g n ++ (regCard segundo) g n ;
|
||||
<ten, NOrd g n> => regCard vigesimo g n ;
|
||||
<hundred, NOrd g n> => regCard duocentesimo g n
|
||||
}
|
||||
} ;
|
||||
|
||||
regCard : Str -> Gender -> Number -> Str = \vigesimo ->
|
||||
pronForms (adjPreto vigesimo) ;
|
||||
regCard : Str -> Gender -> Number -> Str ;
|
||||
regCard vigesimo = case vigesimo of {
|
||||
-- to handle milhão case (in ParseExtend module)
|
||||
milh + "ão" => \g, n -> genNumForms vigesimo vigesimo (milh + "ões") vigesimo ! g ! n;
|
||||
_ => pronForms (adjPreto vigesimo)
|
||||
} ;
|
||||
|
||||
spl : (CardOrd => Str) -> {s : CardOrd => Str ; n : Number} = \s -> {
|
||||
s = s ;
|
||||
n = Pl
|
||||
} ;
|
||||
|
||||
mil : CardOrd -> Str = \g ->
|
||||
(mkTal "mil" [] [] [] "milésimo" [] []).s ! unit ! g ;
|
||||
mkNumStr : Str -> Str -> CardOrd => Str ;
|
||||
mkNumStr cem centesimo = \\co =>
|
||||
case co of {
|
||||
NCard _ => cem ;
|
||||
NOrd g n => regCard centesimo g n
|
||||
} ;
|
||||
|
||||
mil : CardOrd => Str ;
|
||||
mil = mkNumStr "mil" "milésimo" ;
|
||||
|
||||
e_CardOrd : CardOrd -> Str = \co -> case co of {
|
||||
NCard _ => "e" ;
|
||||
_ => []
|
||||
} ;
|
||||
|
||||
param
|
||||
DForm = unit | teen | ten | hundred ;
|
||||
|
||||
---
|
||||
-- numerals as sequences of digits
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ Available treebanks:
|
||||
rgl-exx.txt -- an old text treebank for RGL copied from GF/test/exx-resource.gfs
|
||||
rgl-api-trees.txt -- the examples of the library synopsis in core RGL terms
|
||||
ud-rgl-trees.txt -- trees constructed from Universal Dependencies documentation by using words from the common RGL Lexicon
|
||||
numeral-trees.txt -- trees representing a variety of numerals
|
||||
|
||||
To run a treebank on a language:
|
||||
|
||||
|
||||
10
treebanks/numeral-trees.txt
Normal file
10
treebanks/numeral-trees.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
num (pot2as3 (pot2 pot01))
|
||||
num (pot2as3 (pot2plus pot01 (pot0as1 pot01)))
|
||||
num (pot2as3 (pot2 (pot0 n2)))
|
||||
num (pot2as3 (pot2plus (pot0 n2) (pot0as1 pot01)))
|
||||
num (pot3 (pot1as2 (pot0as1 pot01)))
|
||||
num (pot3plus (pot1as2 (pot0as1 pot01)) (pot1as2 (pot0as1 pot01)))
|
||||
num (pot3 (pot1as2 (pot0as1 (pot0 n2))))
|
||||
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot1as2 (pot0as1 pot01)))
|
||||
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot2 (pot0 n8)))
|
||||
num (pot3plus (pot1as2 (pot0as1 (pot0 n2))) (pot2plus (pot0 n8) (pot1plus n3 (pot0 n2))))
|
||||
Reference in New Issue
Block a user