From d2ad4d6105c534025e12af2ad3d7b1e6ab2de489 Mon Sep 17 00:00:00 2001 From: Michal Mechura Date: Thu, 16 Apr 2020 20:01:38 +0200 Subject: [PATCH] improvements to czech, mostly morphological --- src/czech/ResCze.gf | 93 +++++++++++++++++++------------------- src/czech/StructuralCze.gf | 4 +- 2 files changed, 49 insertions(+), 48 deletions(-) diff --git a/src/czech/ResCze.gf b/src/czech/ResCze.gf index 7f791252e..f683a2ab1 100644 --- a/src/czech/ResCze.gf +++ b/src/czech/ResCze.gf @@ -16,7 +16,7 @@ param Case = Nom | Gen | Dat | Acc | Voc | Loc | Ins ; -- traditional order Person = P1 | P2 | P3 ; - + Agr = Ag Gender Number Person ; CTense = CTPres | CTPast ; ----- TODO complete the tense system to match Czech verb morphology @@ -27,16 +27,16 @@ oper hardConsonant : pattern Str = #("d"|"t"|"g"|"h"|"k"|"n"|"r") ; softConsonant : pattern Str = #("ť"|"ď"|"j"|"ň"|"ř"|"š"|"c"|"č"|"ž") ; neutralConsonant : pattern Str = #("b"|"f"|"l"|"m"|"p"|"s"|"v") ; - + consonant : pattern Str = #( - "d" | "t" | "g" | "h" | "k" | "n" | "r" | - "ť" | "ď" | "j" | "ň" | "ř" | "š" | "c" | "č" | "ž" | + "d" | "t" | "g" | "h" | "k" | "n" | "r" | + "ť" | "ď" | "j" | "ň" | "ř" | "š" | "c" | "č" | "ž" | "b" | "f" | "l" | "m" | "p" | "s" | "v" - ) ; + ) ; dropFleetingE : Str -> Str = \s -> case s of { - x + "e" + c@("k"|"c") => x + c ; + x + "e" + c@("k"|"c"|"n") => x + c ; x + "e" + "ň" => x + "n" ; _ => s } ; @@ -85,10 +85,10 @@ oper shortFemPlGen : Str -> Str = \s -> case s of { ul + "ice" => ul + "ic" ; koleg + "yně" => koleg + "yň" ; - ruz + "e" => ruz + "i" ; + ruz + "e" => ruz + "í" ; _ => Predef.error ("shortFemPlGen does not apply to" ++ s) - } ; - + } ; + --------------- -- Nouns --------------- @@ -136,7 +136,7 @@ oper -- terminology of CEG DeclensionType : Type = Str -> NounForms ; - + declensionNounForms : (nom,gen : Str) -> Gender -> NounForms = \nom,gen,g -> let decl : DeclensionType = case of { @@ -173,12 +173,12 @@ oper _ + "e" => declMORE s ; _ + "í" => declSTAVENI s ; _ => Predef.error ("cannot guess declension type for" ++ s) - } ; + } ; -- the traditional declensions, in both CEG and Wiki -- they are also exported in ParadigmsCze with names panN etc - declPAN : DeclensionType = \pan -> --- plural nom ové|i|é can be changed with ** {pnom = ...} CEG 3.5.1 + declPAN : DeclensionType = \pan -> --- plural nom ové|i|é can be changed with ** {pnom = ...} CEG 3.5.1 { snom = pan ; sgen,sacc = pan + "a" ; @@ -193,7 +193,7 @@ oper ploc = addEch pan ; g = Masc Anim } ; - + declPREDSEDA : DeclensionType = \predseda -> --- 3.5.4: sgen y/i let predsed = init predseda in @@ -284,7 +284,7 @@ oper sins = muz + "em" ; pnom = case muz_ of { - uci + "tel" => uci + "tele" ; + uci + "tel" => uci + "telé" ; _ => muz + "i" --- muzové } ; pgen = muz + "ů" ; @@ -308,7 +308,7 @@ oper pdat = soudc + "ům" ; pacc = soudce ; ploc = soudc + "ích" ; - pins = soudc + "i" ; + pins = soudc + "i" ; g = Masc Anim } ; @@ -332,7 +332,8 @@ oper in { snom,sgen,svoc = ruze ; --- pnom,pacc - sdat,sacc,sloc,sins = ruz + "i" ; + sdat,sacc,sloc = ruz + "i" ; + sins = ruz + "í" ; pnom,pacc = ruze ; pgen = shortFemPlGen ruze ; @@ -343,7 +344,7 @@ oper } ; declPISEN : DeclensionType = \pisen -> - let pisn = dropFleetingE pisen + let pisn = dropFleetingE pisen in { snom,sacc = pisen ; @@ -389,7 +390,7 @@ oper pins = kur + "aty" ; g = Neutr } ; - + declMORE : DeclensionType = \more -> --- 3.7.2 pgen zero sometimes let mor = init more in @@ -399,13 +400,13 @@ oper sins = mor + "em" ; pnom,pacc = more ; - pgen = mor + "í" ; --- + pgen = mor + "í" ; --- pdat = mor + "ím" ; - ploc = mor + "ích" ; + ploc = mor + "ích" ; pins = mor + "i" ; g = Neutr } ; - + declSTAVENI : DeclensionType = \staveni -> { snom,sgen,sdat,sacc,svoc,sloc = staveni ; @@ -422,7 +423,7 @@ oper -- Adjectives -- to be used for AP: 56 forms for each degree - Adjective : Type = {s : Gender => Number => Case => Str} ; + Adjective : Type = {s : Gender => Number => Case => Str} ; -- to be used for A, in three degrees: 15 forms in each ---- TODO other degrees than positive @@ -444,25 +445,25 @@ oper adjFormsAdjective : AdjForms -> Adjective = \afs -> { s = \\g,n,c => case of { - + | => afs.msnom ; | => afs.fsnom ; => afs.nsnom ; - + | => afs.msgen ; | => afs.fsgen ; - + => afs.msdat ; => afs.fsdat ; => afs.fsacc ; - + => afs.msloc ; - + | => afs.msins ; => afs.fsins ; @@ -473,7 +474,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { => afs.pgen ; => afs.pins } - + } ; -- hard declension @@ -571,7 +572,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { pastpartpl = "byli" ; negpressg3 = "ní" ; -- ne is added to this } ; - + haveVerbForms : VerbForms = { inf = "mít" ; pressg1 = "mám" ; @@ -590,7 +591,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { iii_kupovatVerbForms : Str -> VerbForms = \kupovat -> let kupo = Predef.tk 3 kupovat ; - kupu = Predef.tk 1 kupo + "u" + kupu = Predef.tk 1 kupo + "u" in { inf = kupovat ; @@ -604,7 +605,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { pastpartpl = kupo + "vali" ; } ; - + --------------------------- -- Pronouns @@ -637,7 +638,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { cgen,cacc = "tě" ; dat,pdat,loc = "tobě" ; cdat = "ti" ; - ins,pins = "tebou" + ins,pins = "tebou" } ; Ag (Masc _) Sg P3 => { nom = "on" ; @@ -714,10 +715,10 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { oper DemPronForms : Type = { msnom, fsnom, nsnom, - msgen, fsgen, + msgen, fsgen, msdat, -- fsdat = fsgen unlike AdjForms fsacc, - msloc, + msloc, msins, fsins, mpnom, fpnom, -- mpacc = fpacc = fpnom pgen, @@ -748,7 +749,7 @@ oper fsnom = t + "a" ; nsnom = t + "o" ; msgen = t + "oho" ; - fsgen = t + "é" ; + fsgen = t + "é" ; msdat = t + "omu" ; fsacc = t + "u" ; msloc = t + "om" ; @@ -760,9 +761,9 @@ oper pdat = t + "ěm" ; pins = t + "ěmi" ; } ; - + invarDemPronForms : Str -> DemPronForms = \s -> { - msnom, fsnom, nsnom, msgen, fsgen, + msnom, fsnom, nsnom, msgen, fsgen, msdat, fsacc, msloc, msins, fsins, mpnom, fpnom, pgen, pdat, pins = s ; } ; @@ -776,7 +777,7 @@ oper Loc => "kom" ; Ins => "kým" } ; - + coForms : Case => Str = table { Nom|Acc|Voc => "co" ; Gen => "čeho" ; @@ -790,10 +791,10 @@ oper -- singular forms of demonstratives NumeralForms : Type = { msnom, fsnom, nsnom, - msgen, fsgen, - msdat, + msgen, fsgen, + msdat, fsacc, - msloc, + msloc, msins, fsins : Str } ; @@ -801,7 +802,7 @@ oper \nume,size -> let dem = nume ** - {mpnom, fpnom, pgen, pdat, pins = nume.msnom} ; --- plural forms not used + {mpnom, fpnom, pgen, pdat, pins = nume.msnom} ; --- plural forms not used demAdj = dem ** {fsdat = dem.fsgen} ; adjAdj = adjFormsAdjective demAdj in { @@ -810,7 +811,7 @@ oper } ; -- example: number 1 - oneNumeral : Determiner = numeralFormsDeterminer (mkDemPronForms "jed") Num1 ; + oneNumeral : Determiner = numeralFormsDeterminer ((mkDemPronForms "jedn") ** {msnom = "jeden"}) Num1 ; -- numbers 2,3,4 ---- to check if everything comes out right with the determiner type twoNumeral : Determiner = @@ -820,7 +821,7 @@ oper msdat, msins, fsins = "dvěma" } in numeralFormsDeterminer forms Num2_4 ; - + threeNumeral : Determiner = let forms = { msnom, fsnom, nsnom, fsacc, msgen, fsgen = "tři" ; @@ -850,7 +851,7 @@ oper invarDeterminer : Str -> NumSize -> Determiner = \sto,size -> regNumeral sto sto ; - + invarNumeral : Str -> Determiner = \s -> invarDeterminer s Num5 ; -------------------------------- @@ -881,4 +882,4 @@ oper Num1 => Sg ; _ => Pl ---- TO CHECK } ; -} \ No newline at end of file +} diff --git a/src/czech/StructuralCze.gf b/src/czech/StructuralCze.gf index 6cbf9280a..347e7480f 100644 --- a/src/czech/StructuralCze.gf +++ b/src/czech/StructuralCze.gf @@ -1,8 +1,8 @@ -concrete StructuralCze of Structural = CatCze ** +concrete StructuralCze of Structural = CatCze ** open ParadigmsCze, ResCze, Prelude in { lin - and_Conj = mkConj "i" ; + and_Conj = mkConj "a" ; by8agent_Prep = mkPrep "od" Gen ; ---- TODO this means "from", there might be no good translation few_Det = invarNumeral "málo" ; -- CEG 6.8 --- TODO genitive mála for_Prep = mkPrep "pro" accusative ;