1
0
forked from GitHub/gf-core
Files
gf-core/lib/src/catalan/MorphoCat.gf
2014-11-05 17:49:27 +00:00

312 lines
11 KiB
Plaintext

--# -path=.:../romance:../common:../../prelude
--1 A Simple Catalan Resource Morphology
--
-- Aarne Ranta 2002 -- 2005
-- Jordi Saludes 2008: Derived from MorphoSpa.
-- Inari Listenmaa 2012: Added smart paradigms for adjectives.
--
-- This resource morphology contains definitions needed in the resource
-- syntax. To build a lexicon, it is better to use $ParadigmsCat$, which
-- gives a higher-level access to this module.
resource MorphoCat = CommonRomance, ResCat **
open PhonoCat, Prelude, Predef in {
flags optimize=all ; coding=utf8 ;
--2 Nouns
--
-- The following macro is useful for creating the forms of number-dependent
-- tables, such as common nouns.
-- gcc M2.3
oper
numForms : (_,_ : Str) -> Number => Str = \vi, vins ->
table {Sg => vi ; Pl => vins} ;
nomCep : Str -> Number => Str = \cep ->
numForms cep (cep + "s") ;
nomVaca : Str -> Number => Str = \vaca ->
let va : Str = Predef.tk 2 vaca ;
ca : Str = Predef.dp 2 vaca ;
ques : Str = case (ca) of {
"ca" => "ques" ;
_ => "gues"
} ;
in numForms vaca (va + ques) ;
nomCasa : Str -> Str -> Number => Str = \es,casa ->
numForms casa (init casa + es) ;
nomFre : Str -> Number => Str = \fre ->
numForms fre (fre + "ns") ;
nomCas : Str -> Number => Str = \cas ->
numForms cas (cas + "os") ;
nomTest : Str -> Number => Str = \test ->
numForms test (variants {test + "s"; test + "os"}) ;
nomLlengua: Str -> Number => Str = \llengua ->
let
lleng = Predef.tk 2 llengua
in
numForms llengua (lleng + "ües") ;
nomFaig : Str -> Number => Str = \faig ->
let
fa = Predef.tk 2 faig
in
numForms faig (variants {fa + "jos" ; faig + "s"}) ;
nomDesig : Str -> Number => Str = \desig ->
let
desi = Predef.tk 1 desig
in
numForms desig (variants {desi + "tjos" ; desi + "gs"}) ;
nomTemps : Str -> Number => Str = \temps ->
numForms temps temps ;
-- Common nouns are inflected in number and have an inherent gender.
mkNoun : (Number => Str) -> Gender -> Noun = \noinois,gen ->
{s = noinois ; g = gen} ;
mkNounIrreg : Str -> Str -> Gender -> Noun = \vi,vins ->
mkNoun (numForms vi vins) ;
mkNomReg : Str -> Noun = \noi ->
let
mkNounMas : (Str -> Number => Str) -> Noun = \rule -> mkNoun (rule noi) Masc
in
case noi of {
_ + ("ca"|"ga") => mkNoun (nomVaca noi) Fem ;
_ + "gua" => mkNoun (nomLlengua noi) Fem ;
_ + "a" => mkNoun (nomCasa "es" noi) Fem ;
_ + ("s"|"x"|"ç") => mkNounMas nomCas ;
_ + "i" => mkNounMas nomFre ;
_ + "í" => mkNounMas (nomCasa "ins") ;
_ + "à" => mkNounMas (nomCasa "ans") ;
_ + "ó" => mkNounMas (nomCasa "ons") ;
_ + "ig" => mkNounMas nomFaig ;
_ => mkNounMas nomCep
} ;
--2 Adjectives
--
-- Adjectives are conveniently seen as gender-dependent nouns.
-- Here are some patterns. First one that describes the worst case.
-- gcc M2.1
mkAdj : (_,_,_,_,_ : Str) -> Adj = \prim,prima,prims,primes,primament ->
{s = table {
AF Masc n => numForms prim prims ! n ;
AF Fem n => numForms prima primes ! n ;
AA => primament
}
} ;
--- Then the regular and invariant patterns.
adjPrim : Str -> Adj = \prim ->
mkAdj prim (prim + "a") (prim + "s") (prim + "es") (prim + "ament") ;
adjBlau : Str -> Str -> Adj = \blau,blava ->
let blav = Predef.tk 1 blava
in mkAdj blau blava (blau + "s") (blav + "es")
(blava + "ment") ;
adjFondo : Str -> Adj = \fondo ->
let fond = Predef.tk 1 fondo
in adjBlau fondo (fond + "a") ;
adjBo : Str -> Adj = \bo ->
mkAdj bo (bo + "na") (bo + "ns") (bo + "nes") (bo + "nament") ;
adjFidel : Str -> Adj = \fidel ->
let fidels : Str = case (last fidel) of {
_ + ("s"|"ç"|"x") => fidel + "os" ; --feliç; capaç
_ => fidel + "s"
} ;
in mkAdj fidel fidel fidels fidels
(fidel + "ment") ;
--boig, boja, bojos, boges
--lleig, lletja, lletjos, lletges
adjIg : Str -> Str -> Adj = \boig,boja ->
let boj : Str = tk 1 boja ;
llet : Str = tk 1 boj
in mkAdj boig (boj + "a") (boj + "os") (llet + "ges")
(boj + "ament") ;
--públic pública públics públiques
--llarg llarga llargs llargues
adjXc : Str -> Adj = \blanc ->
let blan : Str = init blanc ;
blanqu : Str = case last blanc of {
"c" => blan + "qu" ;
"g" => blan + "gu" --llarg, not boig.
} ;
in mkAdj blanc (blanc + "a")
(blanc + "s") (blanqu + "es")
(blanc + "ament") ;
--sibilant endings
adjXs : Str -> Str -> Adj = \famos,famosa ->
let russ : Str = tk 1 famosa ;
in mkAdj famos famosa (russ + "os") (russ + "es")
(russ + "ament") ;
-- català catalana catalans catalanes
adjVn : Str -> Adj = \catalA ->
let catal : Str = init catalA ;
v : Str = unaccent (last catalA) ;
catalVn : Str = catal + v + "n" ;
in mkAdj catalA (catalVn + "a")
(catalVn + "s") (catalVn + "es")
(catalVn + "ament") ;
--casat casada ; groc groga
adjCasat : Str -> Adj = \casat ->
let casa : Str = init casat ;
casad : Str = case last casat of {
"t" => casa + "d" ;
"c" => casa + "g"
} ;
grogu : Str = case last casad of {
"g" => casa + "gu" ;
_ => casad
} ;
in mkAdj casat (casad + "a")
(casat + "s") (grogu + "es")
(casad + "ament") ;
-- francès francesa francesos franceses
adjFrances : Str -> Adj = \francEs ->
let franc : Str = tk 2 francEs ;
e : Str = last (tk 1 francEs) ;
v : Str = unaccent e ;
francVs : Str = franc + v + "s"
in mkAdj francEs (francVs + "a")
(francVs + "os") (francVs + "es")
(francVs + "ament") ;
--europeu europea europeus europees
adjEuropeu : Str -> Adj = \europeu ->
let europe : Str = tk 1 europeu ;
in mkAdj europeu (europe + "a")
(europeu + "s") (europe + "es")
(europe + "ament") ;
--belga belga belgues belgues
adjBelga : Str -> Adj = \belga ->
let belg : Str = init belga ;
belgu : Str = case last belg of {
("g"|"c") => belg + "u" ;
_ => belg
} ;
belgues : Str = belgu + "es"
in mkAdj belga belga belgues belgues (belga + "ment") ;
mkAdjReg : Str -> Adj = \prim ->
case prim of {
_ + "ll" => adjPrim prim ; --vell~vella
_ + "rn" => adjPrim prim ; --modern~moderna
_ + ("l"|"n"|"ç") => adjFidel prim ; --local; gran; capaç. For espanyol~espanyola mk2A.
_ + "a" => adjBelga prim ; --invariable, -es in plural
_ + ("eu") => adjFidel prim ; --greu; breu. most "eu" are invariable, europeu and jueu with mk2A.
_ + ("au"|"ou"|"iu") => adjBlau prim (tk 1 prim + "va"); --blau; nou; viu
_ + ("e"|"o") => adjFondo prim ;
_ + "ig" => adjIg prim (tk 2 prim + "ja") ; --boig~boja. lleig~lletja with mk2A.
_ + ("c"|"g") => adjXc prim ; --públic; llarg. cec~cega with mk2A
_ + ("n"|"l"|"r"|"s") + "t" => adjPrim prim ; --mort,llest,distint
_ + "t" => adjCasat prim ; --tancat~tancada. petit~petita with mk2A.
_ + ("à"|"é"|"è"|"í"|"ó"|"ò"|"ú") => adjVn prim ; --comú~comuna
_ + ("à"|"é"|"è"|"í"|"ó"|"ò"|"ú") + "s" => adjFrances prim ;
_ + ("s"|"x") => adjXs prim (prim + "a") ; --divers~diversa
_ => adjPrim prim
} ;
--Used for the following:
--diferent diferent : doesn't end in l/n/ç/eu but has invariant feminine
--petit petita petits petites : voiceless plosive in the stem.
--ridícul ridícula : ends in l/n/ç but is not invariant.
--lleig lletja : the geminated variant of boig boja
--bo bona ; pla plana : like adjVn, but for one syllable words
--diari diària ; ingenu ingènua : in feminine, stress in antepenultimate
--jueu jueva ; europeu europea : exceptional paradigms for "eu" ending
--rus russa : voiceless s in the stem
--groc groga : voiced g in the stem
mkAdj2Reg : Str -> Str -> Adj = \petit,petita ->
case <petit,petita> of {
<_, _ + ("b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|"l"|"m"|"n"|"o"|"p"|"q"|"r"|"s"|"t"|"u"|"v"|"x"|"y"|"z")> => adjFidel petit ; --feminine doesn't end in "a"
<p@(_ + ("t"|"l"|"ç")), p2+"a"> => adjPrim petit ; --1) petit~petita 2) ridícul~ridícula, dolç~dolça -- was nonlinear
<_ + "ig", _> => adjIg petit petita ; --lleig~letja
<_, _+ "na"> => adjVn petit ; --pla~plana
<_, _ + ("à"|"é"|"è"|"í"|"ó"|"ò"|"ú") + _> => adjBlau petit petita ; --diari~diària
<_ + "u" , _ + "va"> => adjBlau petit petita ; --jueu~jueva
<_ + "eu", _ + "ea"> => adjEuropeu petit ; --europeu~europea
<_ + "s" , _> => adjXs petit petita ; --rus~russa
<_ + "c" , _ + "ga"> => adjCasat petit ; --groc~groga
_ => mkAdjReg petit
} ;
oper unaccent : Str -> Str = \vocal ->
case vocal of {
("é"|"è") => "e" ;
("ó"|"ò") => "o" ;
"à" => "a" ;
"í" => "i" ;
"ú" => "u" ;
_ => vocal
} ;
--2 Personal pronouns
--
-- All the eight personal pronouns can be built by the following macro.
-- The use of "en" as atonic genitive is debatable.
mkPronoun : (_,_,_,_,_,_,_,_ : Str) ->
Gender -> Number -> Person -> Pronoun =
\ell,el,li,Ell,son,sa,elsSeus,lesSeves,g,n,p ->
let
aell : Case -> Str = \x -> prepCase x ++ Ell ;
in {
s = table {
Nom => {c1 = [] ; c2 = [] ; comp = ell ; ton = Ell} ;
Acc => {c1 = el ; c2 = [] ; comp = [] ; ton = Ell} ;
CPrep P_a => {c1 = [] ; c2 = li ; comp = [] ; ton = aell (CPrep P_a)} ;
c => {c1 = [] ; c2 = [] ; comp, ton = aell c}
} ;
poss = \\n,g => case <n,g> of {
<Sg,Masc> => son ;
<Sg,Fem> => sa ;
<Pl,Masc> => elsSeus ;
<Pl,Fem> => lesSeves
} ;
a = Ag g n p ;
hasClit = True ; isPol = False
} ;
elisPoss : Str -> Str = \s ->
pre {
vocal => s + "on" ;
_ => s + "a"
} ;
--2 Determiners
--
-- Determiners, traditionally called indefinite pronouns, are inflected
-- in gender and number, like adjectives.
pronForms : Adj -> Gender -> Number -> Str = \tal,g,n -> tal.s ! AF g n ;
}