From 167e80df973f57cee72c1bd4cec4dd43e6614b07 Mon Sep 17 00:00:00 2001 From: odanoburu Date: Wed, 9 Jan 2019 12:00:32 -0200 Subject: [PATCH 1/2] (Por) improve adjective smart paradigms - make it about guessing feminine form from the lemma (masculine form) - this way one can reuse the noun paradigm in the adjective paradigms, simplifying it and improving it at the same time - add cases for 'mente' obs: works but doesn't compile? --- src/portuguese/LexiconPor.gf | 4 +- src/portuguese/MorphoPor.gf | 93 +++++++++++----------------------- src/portuguese/NumeralPor.gf | 2 +- src/portuguese/ParadigmsPor.gf | 52 +++++++++++-------- 4 files changed, 64 insertions(+), 87 deletions(-) diff --git a/src/portuguese/LexiconPor.gf b/src/portuguese/LexiconPor.gf index 8ddc28cd..c1b8d7ee 100644 --- a/src/portuguese/LexiconPor.gf +++ b/src/portuguese/LexiconPor.gf @@ -10,7 +10,7 @@ flags lin easy_A2V = mkA2V (mkA "fácil") dative genitive ; married_A2 = mkA2 (mkA "casado") (mkPrep "com") ; - probable_AS = mkAS (prefA (mkA "provável" "provavelmente")) ; + probable_AS = mkAS (prefA (mkA "provável")) ; fun_AV = mkAV (mkA "divertido") genitive ; -- A bad_A = prefA (mkA (mkA "mau") (mkA "pior")) ; @@ -58,7 +58,7 @@ lin white_A = compADeg (mkA "branco") ; wide_A = mkA "largo" ; -- extenso yellow_A = mkA "amarelo" ; - young_A = prefA (mkA "jovem" "juvenilmente") ; + young_A = prefA (mkA "jovem") ; already_Adv = mkAdv "já" ; far_Adv = mkAdv "longe" ; ----? now_Adv = mkAdv "agora" ; diff --git a/src/portuguese/MorphoPor.gf b/src/portuguese/MorphoPor.gf index 312b80f7..4d39b406 100644 --- a/src/portuguese/MorphoPor.gf +++ b/src/portuguese/MorphoPor.gf @@ -157,75 +157,40 @@ oper } } ; - mkAdj2 : (_,_: Str) -> Adj ; - mkAdj2 aj av = let - adj = mkAdjReg aj - in { - s = table { - ASg g _ => adj.s ! ASg g APred ; - APl g => adj.s ! APl g ; - AA => av + mkAdj4 : (_,_,_,_ : Str) -> Adj ; + mkAdj4 ms fs mp fp = { + s = table { + ASg g _ => genForms ms fs ! g ; + APl g => genForms mp fp ! g ; + AA => case fs of { + exeg + v@("é"|"á"|"í"|"ó"|"ú"|"ê"|"ô") + tica + => exeg + (diacriticToVowel v) + tica + "mente" ; + + comu + "m" => comu + "mente" ; -- for Brazilian Portuguese + + _ => fs + "mente" } + } } ; --- Then the regular and invariant patterns. + mkAdjFromNouns : Noun -> Noun -> Adj ; + mkAdjFromNouns nm nf = mkAdj4 (nm.s ! Sg) (nf.s ! Sg) (nm.s ! Pl) (nf.s ! Pl) ; - adjPreto : Str -> Adj = \preto -> - let - pret = Predef.tk 1 preto - in - mkAdj preto (pret + "a") (pret + "os") (pret + "as") (pret + "amente") ; - - -- masculine and feminine are identical: - -- adjectives ending with -e, -a and many but not all that end in a - -- consonant - adjUtil : Str -> Str -> Adj = \útil,úteis -> - mkAdj útil útil úteis úteis (útil + "mente") ; - - -- adjectives that end in consonant but have different masc and fem - -- forms español, hablador ... - adjOuvidor : Str -> Str -> Adj = \ouvidor,ouvidora -> - mkAdj ouvidor ouvidora (ouvidor + "es") (ouvidor + "as") (ouvidora + "mente") ; - - adjBlu : Str -> Adj = \blu -> - mkAdj blu blu blu blu blu ; --- blasé - - -- francês francesa franceses francesas - adjFrances : Str -> Adj = \francês -> - let franc : Str = Predef.tk 2 francês ; - frances : Str = franc + "es" ; - in mkAdj francês (frances + "a") (frances + "es") (frances + "as") (frances + "amente") ; - - - -- alemão alemã alemães alemãs - -- is there really a need for this? is it as useful as the spanish - -- one? - adjVo : Str -> Adj = \alemão -> - let alemã : Str = init alemão ; - alem : Str = init alemã ; - ã : Str = last alemã ; - v : Str = case ã of { - "ã" => "a" - } ; - alemvo : Str = alem + v + "o" ; - in mkAdj alemão alemã (alemã + "s") (alemã + "es") (alemã + "mente") ; - - adjEuropeu : Str -> Adj = \europeu -> let europe = init europeu in - mkAdj europeu (europe + "ia") (europeu + "s") (europe + "ias") - (europe + "iamente") ; + mkAdjReg2 : Str -> Str -> Adj ; + mkAdjReg2 ms fs = mkAdjFromNouns (mkNomReg ms) (mkNomReg fs) ; + -- smart paradigm for adjectives amounts to guessing the feminine + -- form from the masculine form given, and then using the noun smart + -- paradigm for the plural forms mkAdjReg : Str -> Adj = \a -> - case a of { - pret + "o" => adjPreto a ; - anarquist + v@("e" | "a") => adjUtil a (a + "s") ; - ouvido + "r" => adjOuvidor a (ouvido + "ra") ; - chin + "ês" => adjFrances a ; - europ + "eu" => adjEuropeu a ; - alem + "ão" => adjVo a ; - provav + v@("e" | "i") + "l" => adjUtil a (provav + "eis") ; -- fails at pueril - nomina + "l" => adjUtil a (nomina + "is") ; - jove + "m" => adjUtil a (jove + "ns") ; - _ => adjUtil a (a + "s") + let mkAdj : Str -> Adj = mkAdjReg2 a ; + in case a of { + alem + "ão" => mkAdj (alem + "ã") ; -- fails for patrão/patroa + pret + "o" => mkAdj (pret + "a") ; + ouvido + "r" => mkAdj (ouvido + "ra") ; + chin + "ês" => mkAdj (chin + "esa") ; + europ + "eu" => mkAdj (europ + "eia") ; + _ => mkAdj a } ; --2 Personal pronouns @@ -314,4 +279,4 @@ oper n = number } ; -} +} ; diff --git a/src/portuguese/NumeralPor.gf b/src/portuguese/NumeralPor.gf index 0e606a39..5146a9f2 100644 --- a/src/portuguese/NumeralPor.gf +++ b/src/portuguese/NumeralPor.gf @@ -130,7 +130,7 @@ concrete NumeralPor of Numeral = CatPor [Numeral,Digits] ** regCard vigesimo = case vigesimo of { -- to handle milhão case (in ParseExtend module) milh + "ão" => \g, n -> genNumForms vigesimo vigesimo (milh + "ões") vigesimo ! g ! n; - _ => pronForms (adjPreto vigesimo) + _ => pronForms (mkAdjReg vigesimo) } ; spl : (CardOrd => Str) -> {s : CardOrd => Str ; n : Number} = \s -> { diff --git a/src/portuguese/ParadigmsPor.gf b/src/portuguese/ParadigmsPor.gf index d4b285be..9c39c5ac 100644 --- a/src/portuguese/ParadigmsPor.gf +++ b/src/portuguese/ParadigmsPor.gf @@ -195,8 +195,11 @@ oper regA : Str -> A ; regA a = liftAdj (mkAdjReg a) ; - mk2A : (único,unicamente : Str) -> A ; - mk2A adj adv = liftAdj (mkAdj2 adj adv) ; + mk2A : (patrão,patroa : Str) -> A ; + mk2A ms fs = liftAdj (mkAdjReg2 ms fs) ; + + mk4A : (bobão,bobona,bobões,bobonas : Str) -> A ; + mk4A a b c d = liftAdj (mkAdj4 a b c d) ; mk5A : (preto,preta,pretos,pretas,pretamente : Str) -> A ; mk5A a b c d e = liftAdj (mkAdj a b c d e) ; @@ -205,50 +208,59 @@ oper adjCopula a cop = a ** {copTyp = cop} ; mkADeg : A -> A -> A ; - mkADeg a b = lin A { + mkADeg a b = a ** { s = table { Posit => a.s ! Posit ; _ => b.s ! Posit -- Compar => b.s ! Posit ; -- Superl => "o" ++ b.s ! Posit ; - } ; - isPre = a.isPre ; - copTyp = a.copTyp + } } ; invarA : Str -> A ; - invarA a = liftAdj (adjBlu a) ; + invarA a = liftAdj (mkAdj4 a a a a) ; mkNonInflectA : A -> Str -> A ; - mkNonInflectA = \blanco,hueso -> blanco ** {s = \\x,y => blanco.s ! x ! y ++ hueso } ; + mkNonInflectA blanco hueso = blanco ** { + s = \\x,y => blanco.s ! x ! y ++ hueso + } ; mkA = overload { --- For regular adjectives, all forms are derived from the masculine --- singular. The types of adjectives that are recognized are "alto", --- "fuerte", "util". Comparison is formed by "mas". + -- For regular adjectives, all forms are derived from the + -- masculine singular. The types of adjectives that are recognized + -- are "alto", "fuerte", "util". Comparison is formed by "mas". mkA : (bobo : Str) -> A = regA ; -- predictable adjective --- Some adjectives need the feminine form separately. + -- Some adjectives need the feminine form separately. mkA : (espanhol,espanhola : Str) -> A = mk2A ; --- One-place adjectives compared with "mais" need five forms in the --- worst case (masc and fem singular, masc plural, adverbial). + -- Very rarely (if ever) does one need to specify the adverbial + -- form. + mkA : (burrão,burrona,burrões,burronas : Str) -> A + = mk4A ; + + -- One-place adjectives compared with "mais" need five forms in + -- the worst case (masc and fem singular, masc and fem plural, + -- adverbial). mkA : (bobo,boba,bobos,bobas,bobamente : Str) -> A = mk5A ; --- In the worst case, two separate adjectives are given: the positive --- ("bueno"), and the comparative ("mejor"). - -- special comparison with "mais" as default + -- In the worst case, two separate adjectives are given: the positive + -- ("bom"), and the comparative ("melhor"). special comparison with + -- "mais" as default mkA : (bom : A) -> (melhor : A) -> A = mkADeg ; - mkA : (blanco : A) -> (hueso : Str) -> A -- noninflecting component after the adjective + -- noninflecting component after the adjective + mkA : (blanco : A) -> (hueso : Str) -> A = mkNonInflectA ; - mkA : A -> CopulaType -> A -- force copula type + -- force copula type + mkA : A -> CopulaType -> A = adjCopula ; + } ; -- The functions above create postfix adjectives. To switch them to @@ -369,7 +381,7 @@ oper -- deviant past participle, e.g. abrir - aberto special_ppV ve pa = { s = table { - VPart g n => (adjPreto pa).s ! (genNum2Aform g n) ; + VPart g n => (mkAdjReg pa).s ! (genNum2Aform g n) ; p => ve.s ! p } ; lock_V = <> ; From 9376c108ac0b1c5d2631dcd35e4e9a0aa0879e01 Mon Sep 17 00:00:00 2001 From: odanoburu Date: Thu, 10 Jan 2019 09:53:47 -0200 Subject: [PATCH 2/2] (Por) rm (indirect) use of runtime gluing --- src/portuguese/ConstructionPor.gf | 5 ++++- src/portuguese/MorphoPor.gf | 28 +++++++++++++++------------- src/portuguese/ParadigmsPor.gf | 2 +- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/portuguese/ConstructionPor.gf b/src/portuguese/ConstructionPor.gf index 585f3b1a..e946ad79 100644 --- a/src/portuguese/ConstructionPor.gf +++ b/src/portuguese/ConstructionPor.gf @@ -33,7 +33,10 @@ lin n_units_AP card cn a = mkAP (lin AdA (mkUtt (mkNP (lin CN cn)))) (lin A a) ; n_units_of_NP card cn np = mkNP card (mkCN (lin N2 cn) np) ; - n_unit_CN card cn cn = mkCN (invarA ("de" ++ card.s ! cn.g ++ cn.s ! card.n)) cn ; + n_unit_CN card cn cn = + let s : Str = "de" ++ card.s ! cn.g ++ cn.s ! card.n ; + adj : A = mkA s s s s s ; + in mkCN adj cn ; bottle_of_CN np = mkCN (lin N2 (mkN2 (mkN "garrafa" feminine) part_Prep)) np ; cup_of_CN np = mkCN (lin N2 (mkN2 (mkN "copo") part_Prep)) np ; diff --git a/src/portuguese/MorphoPor.gf b/src/portuguese/MorphoPor.gf index 4d39b406..ffb7f865 100644 --- a/src/portuguese/MorphoPor.gf +++ b/src/portuguese/MorphoPor.gf @@ -64,7 +64,7 @@ oper "i" => "í" ; "o" => "ó" ; "u" => "ú" ; - _ => error ("input '" ++ v ++ "' must be vowel character.") + _ => error ("input" ++ v ++ "must be vowel character.") } ; diacriticToVowel : Str -> Str = \v -> @@ -74,7 +74,7 @@ oper "í" => "i" ; ("ó"|"ô"|"õ") => "o" ; "ú" => "u" ; - _ => error ("input '" ++ v ++ "' must be a vowel character with an accent.") + _ => error ("input" ++ v ++ "must be a vowel character with an accent.") } ; -- Common nouns are inflected in number and have an inherent gender. @@ -130,7 +130,7 @@ oper home + "m" => mkNoun (nomNuvem vinho) Masc ; - g + v@("á"|"é"|"í"|"ó"|"ú"|"ê") + "s" => mkNoun (numForms vinho (g + diacriticToVowel v + "ses")) Masc ; + g + v@("á"|"é"|"í"|"ó"|"ú"|"ê") + "s" => mkNoun (numForms vinho (g + (diacriticToVowel v) + "ses")) Masc ; ônibu + "s" => mkNoun (nomAreia vinho) Masc ; @@ -158,19 +158,21 @@ oper } ; mkAdj4 : (_,_,_,_ : Str) -> Adj ; - mkAdj4 ms fs mp fp = { - s = table { - ASg g _ => genForms ms fs ! g ; - APl g => genForms mp fp ! g ; - AA => case fs of { - exeg + v@("é"|"á"|"í"|"ó"|"ú"|"ê"|"ô") + tica - => exeg + (diacriticToVowel v) + tica + "mente" ; + mkAdj4 ms fs mp fp = + let adv : Str = case fs of { + exeg + vo@("é"|"á"|"í"|"ó"|"ú"|"ê"|"ô") + tica + => exeg + (diacriticToVowel vo) + tica ; - comu + "m" => comu + "mente" ; -- for Brazilian Portuguese + comu + "m" => comu ; -- for Brazilian Portuguese - _ => fs + "mente" + _ => fs + } + "mente" ; + in { + s = table { + ASg g _ => genForms ms fs ! g ; + APl g => genForms mp fp ! g ; + AA => adv } - } } ; mkAdjFromNouns : Noun -> Noun -> Adj ; diff --git a/src/portuguese/ParadigmsPor.gf b/src/portuguese/ParadigmsPor.gf index 9c39c5ac..eead0820 100644 --- a/src/portuguese/ParadigmsPor.gf +++ b/src/portuguese/ParadigmsPor.gf @@ -245,7 +245,7 @@ oper -- One-place adjectives compared with "mais" need five forms in -- the worst case (masc and fem singular, masc and fem plural, -- adverbial). - mkA : (bobo,boba,bobos,bobas,bobamente : Str) -> A = mk5A ; + mkA : (gabarolas,gabarolas,gabarolas,gabarolas,gabarolamente : Str) -> A = mk5A ; -- In the worst case, two separate adjectives are given: the positive -- ("bom"), and the comparative ("melhor"). special comparison with