From b2e2631269dcf770f46971e113ce2eba7e1365a2 Mon Sep 17 00:00:00 2001 From: David Bamutura Date: Sat, 24 Oct 2020 03:05:05 +0300 Subject: [PATCH] All structural words and most of the linearization functions for LexiconCgg have been provided --- src/rukiga/CatCgg.gf | 6 +- src/rukiga/LexiconCgg.gf | 12 ++-- src/rukiga/ParadigmsCgg.gf | 51 ++++++++++++++++- src/rukiga/ResCgg.gf | 11 ++-- src/rukiga/StructuralCgg.gf | 107 ++++++++++++++++++++++++++++++------ 5 files changed, 156 insertions(+), 31 deletions(-) diff --git a/src/rukiga/CatCgg.gf b/src/rukiga/CatCgg.gf index 1d19e003..e46cf623 100755 --- a/src/rukiga/CatCgg.gf +++ b/src/rukiga/CatCgg.gf @@ -92,7 +92,7 @@ lincat Digits = {s : Res.CardOrd => Res.Agreement=>Str ; n : Res.Number ; tail : Px.DTail} ; Ord = {s :Res.Agreement=>Str; position:Res.Position} ; Card = {s :Res.Agreement=>Str; n : Res.Number} ; - + A2 = Res.Adjective ** { c2 : Str ; isPre : Bool} ; DAP = Res.Determiner ; N2 = Res.Noun ** {c2 : Res.Agreement =>Str}; -- relational noun e.g. "son" Prep = Res.Preposition; -- preposition, or just case e.g. "in" @@ -106,6 +106,10 @@ linref VP =\vp -> vp.adv ++ vp.s ++ BIND ++ vp.pres ++ vp.comp ++vp.comp2 ++ vp.ap; VPSlash =\vpslash -> vpslash.s ++ BIND ++ vpslash.pres; +lindef + A2 = \s -> {s = s; position = Res.Post; isProper = False; + isPrep = False; isNeg = False;c2 = ""; isPre = True}; + --1 Cat: the Category System diff --git a/src/rukiga/LexiconCgg.gf b/src/rukiga/LexiconCgg.gf index 6da5b2fe..08b1f2c1 100755 --- a/src/rukiga/LexiconCgg.gf +++ b/src/rukiga/LexiconCgg.gf @@ -218,9 +218,9 @@ lin dust_N = mkN "omucuucu" "omucuucu" ZERO_ZERO; ear_N = mkN "okutu" "amatu" KU_MA; earth_N = mkN "ensi" "ensi" N_N; - -- easy_A2V + easy_A2V = mkA2V "yaguhi" Post False False False; egg_N = mkN "eihuri" "amahuri" I_MA; - -- empty_A + empty_A = mkAdjective "rimu busha" Post False True False; enemy_N = mkN "TODO : cofirm omurabe" "TODO : cofirm abarabe" MU_BA; factory_N = mkN "TODO : cofirm fakatore" "TODO : cofirm fakatore" N_N; fall_V = mkV "gw" "a" "ire"; @@ -240,9 +240,9 @@ lin fridge_N = mkN "firigi" "firigi" ZERO_ZERO; fruit_N = mkN "ekijuma" "ebijuma" KI_BI; full_A = mkAdjective "injwire" Post False True False; - -- fun_AV + --fun_AV = mkAdjective garden_N = mkN "omusiri" "emisiri" MU_MI; - --glove_N + glove_N = mkN "gilavu" "gilavu" ZERO_ZERO; gold_N = mkN "TODO:feza" "TODO:feza" ZERO_ZERO; grass_N = mkN "akanyaasi" "obunyaasi" KA_BU; guts_N = mkN "orubondo" "amabondo" RU_MA; @@ -272,7 +272,7 @@ lin learn_V2 = mkV2 "yeg" "a" "ire"; leather_N = mkN "oruhu" "empu" RU_N; --I think plural should be oruhu again leave_V2 = mkV2 "rug" "a" "ire"; - -- left_Ord + left_Ord = mkOrd "bumosho"; leg_N = mkN "okuguru" "amaguru" KU_MA; lie_V = mkV "beih" "a" "ire"; like_V2 = mkV2 "kun" "da" "zire"; @@ -311,7 +311,7 @@ lin -- rain_V0 religion_N = mkN "endiini" "endiini" N_N; restaurant_N = mkN "hooteeri" "hooteeri" ZERO_ZERO; - -- right_Ord + right_Ord = mkOrd "buryo"; road_N = mkN "orugundo" "engundo" RU_N; -- rock_N --roof_N = mkN "" diff --git a/src/rukiga/ParadigmsCgg.gf b/src/rukiga/ParadigmsCgg.gf index d007baa6..a44f0aaf 100755 --- a/src/rukiga/ParadigmsCgg.gf +++ b/src/rukiga/ParadigmsCgg.gf @@ -207,8 +207,57 @@ mkInterj : Str -> Interj -- Rearrange this document in future so that a paradigms file is -- as should be i.e with an abstract part and a a part with -- definitions - + mkOrd : Str -> Ord = \s -> lin Ord { s = \\_=>s; position = Post}; V0 : Type = V ; AS, A2S, AV : Type = A ; A2V : Type = A2 ; + mkV0 : V -> V; + mkV0 v = v ; + mkA2 : Str -> Position -> Bool -> Bool ->Bool-> A2 = \a2, pos, isProper, isPrep,isNeg -> + lin A2 ((mkAdjective a2 pos isProper isPrep isNeg) ** {c2 = ""; isPre = True}); + --mkA2V : A -> A2V; + --mkA2V a = lin A2V (a * {c2 = ""; isPre = True}); + mkA2V : Str -> Position -> Bool -> Bool ->Bool-> A2V =\a2, pos, isProper, isPrep,isNeg -> lin A2V ((mkAdjective a2 pos isProper isPrep isNeg) ** {c2 = ""; isPre = True}); + + + -- Adverbs modifying numerals + + mkAdN : Str -> AdN ; -- e.g. approximately + mkCAdv : Str -> CAdv ; + mkAdN x = lin AdN (ss x) ; + mkCAdv x = lin CAdv (ss x ** {p = []}) ; + + mkConj : overload { + mkConj : Str -> Conj ; -- and (plural agreement) --% + mkConj : Str -> Number -> Conj ; -- or (agrement number given as argument) --% + mkConj : Str -> Str -> Conj ; -- both ... and (plural) --% + mkConj : Str -> Str -> Number -> Conj ; -- either ... or (agrement number given as argument) --% + } ; + + mkConj = overload { + mkConj : Str -> Conj = \y -> mk2Conj [] y Pl ; -- when you have simply and + mkConj : Str -> Number -> Conj = \y,n -> mk2Conj [] y n ; + mkConj : Str -> Str -> Conj = \x,y -> mk2Conj x y Pl ; -- when you have both ... and ... + mkConj : Str -> Str -> Number -> Conj = mk2Conj ; + } ; + + mk2Conj : Str -> Str -> Number -> Conj = \x,y,n -> + lin Conj {s = \\_=>x; s2 = y; n = n}; + + --2 Adverbs + + -- Adverbs are not inflected. Most lexical ones have position + -- after the verb. Some can be preverbal (e.g. "always"). + + mkAdv : Str -> AgrExist -> Adv ; -- e.g. today + + --mkAdV : Str -> AdV ; -- e.g. always + + -- Adverbs modifying adjectives and sentences can also be formed. + + mkAdA : Str -> Position -> AdA ; -- e.g. quite + --mkCAdv : Str -> Str -> Str -> CAdv ; -- more than/no more than + + mkAdv x agrEx = lin Adv {s = x ; agr = agrEx } ; + mkAdA x pos = lin AdA {s = x ; position = pos } ; -- e.g. quite } diff --git a/src/rukiga/ResCgg.gf b/src/rukiga/ResCgg.gf index 89253088..a6405a02 100755 --- a/src/rukiga/ResCgg.gf +++ b/src/rukiga/ResCgg.gf @@ -563,8 +563,8 @@ mkSubjPrefix : Agreement -> Str =\a ->case a of { _ => mkClitic "-" -- Hopefully exhausted all forms }; - Adverb = {s :Str; agr : AgrExist} ; - mkAdv: Str -> AgrExist -> Adverb =\str, agr ->{s=str; agr=agr}; + Adverb : Type = {s :Str; agr : AgrExist} ; + --dealing with the adjective {- The Adjective can be before the noun for TRUE or @@ -1277,12 +1277,13 @@ mkSubjPrefix : Agreement -> Str =\a ->case a of { -- Structural -- prepositions sometimes have two kinds, near or far i.e omu or omuri - -- We ignore the distal dexis to be entered as a separate lemma - -- Instead str for positional arguments 1 & 2 cater for prepositions that inflect with Number - -- while str for positional argument 3 caters for those that do not inflect with Number + -- We provide for two kinds: near and distal plus a status checker for + -- genitive prepositions + Preposition : Type = {s : Str; other : Str; isGenPrep : Bool}; NounPhrase : Type = {s :Case => Str; agr : Agreement}; + {- Operation to create Noun Phrases from a Determiner and Nouns. In Runyankore and Rukiga, depending on the particular Determiner, diff --git a/src/rukiga/StructuralCgg.gf b/src/rukiga/StructuralCgg.gf index df82f809..1ae12767 100755 --- a/src/rukiga/StructuralCgg.gf +++ b/src/rukiga/StructuralCgg.gf @@ -6,7 +6,7 @@ concrete StructuralCgg of Structural = CatCgg ** {-variants NOTE: Please add them to the abstract syntax, ask aarne - or creat you own abstract Lexicon which inherits from the + or creat your own abstract Lexicon which inherits from the standard one. See how english does it. i.e. use DictCggAbs.gf for the funs. and DictCgg.gf for the lins. @@ -15,14 +15,15 @@ concrete StructuralCgg of Structural = CatCgg ** lin --Determiner : Type = {s : Str ; s2: Agreement=>Str; ntype : NounState ; num : Number ; pos : Position; doesAgree: Bool }; - a_Det = {s =[] ; s2 = \\_ => []; ntype = Complete; num = Sg; pos = Pre; doesAgree = False; numeralS=\\_=>[]; numeralExists = False}; --: Det ; indefinite singular ---s - aPl_Det = {s =[]; s2= \\_ => []; ntype = Complete; num = Pl; pos = Pre; doesAgree = False; numeralS=\\_=>[]; numeralExists = False}; -- : Det ;indefinite plural ---s - the_Det = {s =[]; s2= \\_ => []; ntype = Complete; num = Sg; pos = Pre; doesAgree = False; numeralS=\\_=>[]; numeralExists = False}; --: Det ; -- definite singular ---s thePl_Det = {s =[]; ntype = Complete; num = Pl; pos = PreDeterminer}; --: Det ;definite plural ---s - + --a_Det = {s =[] ; s2 = \\_ => []; ntype = Complete; num = Sg; pos = Pre; doesAgree = False; numeralS=\\_=>[]; numeralExists = False}; --: Det ; indefinite singular ---s + --aPl_Det = {s =[]; s2= \\_ => []; ntype = Complete; num = Pl; pos = Pre; doesAgree = False; numeralS=\\_=>[]; numeralExists = False}; -- : Det ;indefinite plural ---s + --the_Det = {s =[]; s2= \\_ => []; ntype = Complete; num = Sg; pos = Pre; doesAgree = False; numeralS=\\_=>[]; numeralExists = False}; --: Det ; -- definite singular ---s thePl_Det = {s =[]; ntype = Complete; num = Pl; pos = PreDeterminer}; --: Det ;definite plural ---s + -- 1 Determiners every_Det = {s ="buri"; s2 = \\_ => []; ntype=Incomplete; num=Sg; pos=Pre; doesAgree = False; numeralS=\\_=>[]; numeralExists = False} ; few_Det = {s="kye"; s2 = \\_ => []; ntype =Complete; num=Pl; pos=Post; doesAgree = False; numeralS=\\_=>[]; numeralExists = False} ; many_Det ={s="ingi"; s2 = \\_ => []; ntype =Complete; num=Pl; pos=Post; doesAgree = False; numeralS=\\_=>[]; numeralExists = False} ; + -- 2 Pronouns i_Pron = {s = table{Gen => glueGen (AgMUBAP1 Sg); _=> mkSStand (AgMUBAP1 Sg)}; third = \\_,_=>[]; agr = AgrYes (AgMUBAP1 Sg)};--mkPron "nyowe" "nyowe" (AgMUBAP1 Sg); youSg_Pron = {s = table{Gen => glueGen (AgMUBAP2 Sg); _=>mkSStand (AgMUBAP2 Sg)}; third = \\_,_=>[]; agr = AgrYes(AgMUBAP2 Sg)};--mkPron "iwe" "we" (AgMUBAP2 Sg); he_Pron, she_Pron = {s = table{Gen => glueGen (AgP3 Sg MU_BA); _=>mkSStand (AgP3 Sg MU_BA)}; third = \\_,_=>[]; agr = AgrYes(AgP3 Sg MU_BA)};--mkPron "uwe" "uwe" (AgP3 Sg MU_BA); @@ -35,7 +36,9 @@ lin third = \\agr => table{Gen =>glueGen agr; _ => mkSStand agr}; agr = AgrNo }; --mkPron "kyo" "kyo" (AgP3 Sg KI_BI); -- should form an it_Pron_NClass in extra module - + youPol_Pron = {s = table{Gen => glueGen (AgMUBAP2 Pl); _=>mkSStand (AgMUBAP2 Pl)}; third = \\_,_=>[]; agr =AgrYes (AgMUBAP2 Pl)};--mkPron "imwe" "imwe" (AgMUBAP2 Pl); + -- 3 Prepositions + above_Prep = mkPrep "ahinguru ya" [] False; behind_Prep = mkPrep "enyuma ya" [] False; between_Prep =mkPrep "hagati ya" [] False; to_Prep = mkPrep "aha" [] False; @@ -45,16 +48,33 @@ lin on_Prep = mkPrep "aha" "ahari" False; in8front_Prep = mkPrep "enyuma ya" [] False; --: Prep ; -- in front of + by8agent_Prep = mkPrep "by8agent_Prep" [] False ; -- when you meet by, use the passive of the verb + by8means_Prep = mkPrep "by8means_Prep" [] False; + during_Prep = mkPrep "omu" "omuri" False; + except_Prep = mkPrep "kwihaho" [] False; + for_Prep = mkPrep "for_Prep: would need paraphrasing" [] False; + + + {- + the word before is realised as a phrase in RR. + We use the negative version of the verb + used in the main clause. + it is always paraphrased. Let us put a hint for post processing + -} + before_Prep = mkPrep "PostProcess_before_proximal" "PostProcess_before_distal" False; ---na --please this string varies with vowels use combine_morphemes or ---combine_words when using it. + --na --please this string varies with vowels use combine_morphemes or + --combine_words when using it. with_Prep = mkPrep "na" [] False; from_Prep =mkPrep "kurunga" "" False; under_Prep = mkPrep "hansi ya" "" False; after_Prep = mkPrep "omu maisho" "" False; --: Prep ; - - ---Structural + part_Prep = mkPrep "part_Prep=of:Disambiguate" [] False; -- suspect "part of" + possess_Prep = mkPrep [] [] True; -- of is a huge table we should not be carrying arround + through_Prep = mkPrep "raba omu" [] False; + without_Prep = mkPrep "tiine" [] False; + -- 4 Conjunctions {- --there are several and i.e. -- na (two nouns, 2 Noun Phrases, 2 Pronouns, 2 relative subject clauses, ) @@ -97,6 +117,10 @@ lin s2 =[]; n = Sg }; + if_then_Conj = mkConj "kuri" [] Sg; + -- Distributed Conjunction + both7and_DConj = mkConj "mbi" "na"; + either7or_DConj = mkConj "mwe ahari" "nari" Sg; have_V2 ={s= "in"; pres="e"; perf ="e"; isPresBlank = False; isPerfBlank = False; @@ -139,6 +163,12 @@ lin always_AdV = {s = "obutóòsha"; agr = AgrNo}; everywhere_Adv = {s = "hóòna"; agr = AgrNo}; -- adverb of place. here_Adv = {s = "hanu"; agr = AgrNo}; + here7from_Adv = mkAdv "here7from_Adv:findout" AgrNo; + quite_Adv = mkAdA "quite_Adv:findout" Post; + there7from_Adv = mkAdv "there7from_Adv:findout" AgrNo; + there7to_Adv = mkAdv "there7to_Adv:findout" AgrNo; + here7to_Adv = mkAdv "here7to_Adv:findout" AgrNo; + there_Adv = mkAdv "hari" AgrNo; {-End of Adverbs Adverbs-} {-Begining of Quantifiers-} @@ -171,8 +201,9 @@ lin whoPl_IP = { s= "ha"; n = IPl; isVerbSuffix = True; requiresIPPrefix = False; aux="ni"; endOfSentence = True} ;--: IP ; -- who (plural) whoSg_IP = { s= "ha"; n = ISg; isVerbSuffix = True; requiresIPPrefix = False; aux="ni"; endOfSentence = True}; --: IP ; -- who (singular) --You may need to use booleans to indicate that you need these tables rather than carrying them. + --interogative adverbs how_IAdv = {s ="ta"; requiresSubjPrefix = True; endOfSentence =True}; --: IAdv ; - --how8much_IAdv = {s ="kwiga"; s2requireSubjPrefix = True};--: IAdv ; + how8much_IAdv = {s ="kwigana"; requiresSubjPrefix = True; endOfSentence =True};--: IAdv ; when_IAdv = {s ="ryari"; requiresSubjPrefix = False; endOfSentence =True}; --: IAdv ; where_IAdv = {s ="nkahe"; requiresSubjPrefix = False; endOfSentence =True}; --: IAdv ; @@ -211,28 +242,51 @@ lin numeralS=\\_=>[]; numeralExists = False };--: Det ; + + much_Det = + { + s =[]; + s2 =\\agr => "ingi";-- mkSubjCliticTablePl ! agr ++ "mwe"; + ntype = Complete; + num = Pl; + pos = Post; + doesAgree = True; + numeralS=\\_=>[]; + numeralExists = False + };--: Det ; + - want_VV = {s = "yend"; pres="da"; perf = "zire"; isPresBlank = False; + want_VV = {s = "yend"; pres="da"; perf = "zire"; isPresBlank = False; isPerfBlank = False; isRegular = True; p = []; isRefl = False; morphs=mkVerbMorphs; isRegular=True; inf=[]; whenUsed = VVBoth}; - can8know_VV = {s = "baas"; pres="a"; perf = "ize"; isPresBlank = False; + can8know_VV = {s = "baas"; pres="a"; perf = "ize"; isPresBlank = False; isPerfBlank = False; isRegular = True; p = []; isRefl = False; morphs=mkVerbMorphs; isRegular=True; inf=[]; whenUsed = VVBoth};--: VV ; -- can (capacity) - can_VV = {s = "baas"; pres="a"; perf = "ize"; isPresBlank = False; + can_VV = {s = "baas"; pres="a"; perf = "ize"; isPresBlank = False; isPerfBlank = False; isRegular = True; p = []; isRefl = False; morphs=mkVerbMorphs; isRegular=True; inf=[]; whenUsed = VVBoth};--: VV ; -- can (possibility) -- must_VV used especially in the perfective mood: see dictionary entry shemerera on Pg 501 of Mpairwe -- must has no passive form must_VV = {s = "shemere"; pres="ra"; perf = "ire"; isPresBlank = False; isPerfBlank = False; isRegular = False; p = []; isRefl = False; morphs=mkVerbMorphs; isRegular=False; inf=[]; whenUsed = VVPerf}; --VV - --somebody_NP = {}; --: NP ; - --something_NP : NP ; - --somewhere_Adv : Adv ; + everybody_NP = {s = \\_=>"buri muntu" ; agr=AgP3 Sg MU_BA}; + everything_NP = {s = \\_=>"buri kintu" ; agr=AgP3 Sg KI_BI}; + somebody_NP = {s = \\_=>"somebody:omuntu omwe" ; agr=AgP3 Sg MU_BA}; --: NP ; + something_NP = {s = \\_=>"Something:ekintu kimwe" ; agr=AgP3 Sg KI_BI} ; -- NP ; + nobody_NP = {s = \\_=>"tihiine muntu" ; agr=AgP3 Sg MU_BA}; + nothing_NP = {s = \\_=>"tihiine kintu" ; agr=AgP3 Sg KI_BI}; + + + -- Subjunctives that_Subj = ss "ngu" ; when_Subj = ss "obu"; because_Subj = ss "ahabwokuba"; - + although_Subj = ss "nobu"; + if_Subj = ss "kuri"; --Adjective modifying Adverbs almost_AdA = {s="haihi"; position=Pre}; --: AdA ; --quite_Adv ss "kimwe"; --: AdA ; used in the pr + somewhere_Adv = mkAdv "hamwe ahantu" (AgrYes (AgP3 Sg HA)); -- : Adv ; + + so_AdA = {s="munônga"; position=Post};--: AdA ; too_AdA = {s="munônga"; position=Post}; --: AdA ; very_AdA = {s="munônga"; position=Post}; --: AdA ; @@ -242,6 +296,23 @@ lin but_PConj = ss "báìtu"; --: PConj ; -- variants béìtu otherwise_PConj = ss "okûndi"; --: PConj ; therefore_PConj = ss "n'ahabwe'êkyo"; --: PConj ; + + + -- Comparative Adverb + as_CAdv = mkCAdv "nka" ; + less_CAdv = mkCAdv "kye ahari"; + more_CAdv = mkCAdv "ingi ahari"; + + + -- Adverbs modifying numerals + at_most_AdN = mkAdN "ekihango"; + + -- Utterances + no_Utt = ss "apaana"; + yes_Utt = ss "yego"; + lin language_title_Utt = ss "Rukiga" ; + + {- and_Conj : Conj ; both7and_DConj : Conj ; -- both...and