mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-27 08:58:55 -06:00
LangHrv compiles now, but with a partial Slovak lexicon
This commit is contained in:
@@ -22,9 +22,9 @@ concrete CatHrv of Cat =
|
||||
|
||||
VP = {verb : VerbForms ; clit,compl : Agr => Str} ; ---- more fields probably needed
|
||||
VPSlash = {verb : VerbForms ; clit,compl : Agr => Str ; c : ComplementCase} ; ----
|
||||
V = ResHrv.VerbForms ;
|
||||
V2 = ResHrv.VerbForms ** {c : ComplementCase} ;
|
||||
VS,VQ = ResHrv.VerbForms ;
|
||||
V = {s : VerbForms} ;
|
||||
V2 = {s : VerbForms ; c : ComplementCase} ;
|
||||
VS,VQ = {s : VerbForms} ;
|
||||
|
||||
A = ResHrv.AdjForms ;
|
||||
AP = ResHrv.Adjective ** {isPost : Bool} ; -- {s : Gender => Number => Case => Str}
|
||||
@@ -32,7 +32,7 @@ concrete CatHrv of Cat =
|
||||
|
||||
AdA = {s : Str} ;
|
||||
|
||||
N = ResHrv.NounForms ;
|
||||
N = ResHrv.NounForms ** {g : Gender} ;
|
||||
CN = ResHrv.Noun ; -- {s : Number => Case => Str ; g : Gender}
|
||||
NP = {s,clit,prep : Case => Str ; a : Agr ; hasClit : Bool} ; -- clit,prep differ for pronouns
|
||||
PN = {s : Case => Str ; g : Gender} ;
|
||||
@@ -40,7 +40,7 @@ concrete CatHrv of Cat =
|
||||
Quant = {s : Gender => Number => Case => Str} ; -- same as AP
|
||||
Num = Determiner ;
|
||||
Card = Determiner ; -- {s : Gender => Case => Str ; size : NumSize} ;
|
||||
Pron = PronForms ** {poss : DemPronForms} ;
|
||||
Pron = PronForms ** {poss : AdjForms} ;
|
||||
|
||||
Adv = {s : Str} ;
|
||||
Prep = ResHrv.ComplementCase ; -- {s : Str ; c : Case ; hasPrep : Bool} ;
|
||||
@@ -64,8 +64,7 @@ concrete CatHrv of Cat =
|
||||
A = \s -> s.msnom ;
|
||||
|
||||
|
||||
lincat Numeral = Determiner ; ---- TODO: should contain Ord as well
|
||||
lincat Digits = {s:Str ; size : NumSize} ;
|
||||
|
||||
lincat Numeral = {s : AdjForms ; size : NumSize} ;
|
||||
lincat Digits = {s : Str ; size : NumSize} ;
|
||||
|
||||
}
|
||||
|
||||
@@ -57,8 +57,8 @@ concrete LexiconHrv of Lexicon =
|
||||
green_A = mkA "zelený" ;
|
||||
yellow_A = mkA "žltý" ;
|
||||
|
||||
buy_V2 = mkV2 (iii_kupovatVerbForms "kupovať") ;
|
||||
love_V2 = mkV2 (iii_kupovatVerbForms "milovať") ;
|
||||
---- buy_V2 = mkV2 (iii_kupovatVerbForms "kupovať") ;
|
||||
---- love_V2 = mkV2 (iii_kupovatVerbForms "milovať") ;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -24,39 +24,27 @@ lin
|
||||
|
||||
DefArt = {s = \\_,_,_ => []} ;
|
||||
IndefArt = {s = \\_,_,_ => []} ;
|
||||
NumPl = {s = \\_,_ => [] ; size = Num2_4} ; ---- size
|
||||
NumSg = {s = \\_,_ => [] ; size = Num1} ;
|
||||
NumPl = {s = \\_,_ => [] ; size = NS_20_} ; ---- size
|
||||
NumSg = {s = \\_,_ => [] ; size = NS_1} ;
|
||||
|
||||
UsePron pron = {
|
||||
s = table {
|
||||
Nom => pron.nom ;
|
||||
Gen => pron.gen ;
|
||||
Dat => pron.dat ;
|
||||
Acc => pron.acc ;
|
||||
Loc => pron.loc ;
|
||||
s, prep = table { ---- TODO check prep
|
||||
Nom | Voc => pron.nom ;
|
||||
Gen | Acc => pron.gen ;
|
||||
Dat | Loc => pron.dat ;
|
||||
Ins => pron.ins
|
||||
} ;
|
||||
clit = table {
|
||||
Nom => pron.cnom ;
|
||||
Gen => pron.cgen ;
|
||||
Dat => pron.cdat ;
|
||||
Acc => pron.cacc ;
|
||||
Loc => pron.loc ;
|
||||
clit = table { ---- TODO check prep
|
||||
Nom | Voc => pron.nom ;
|
||||
Gen | Acc => pron.cgen ;
|
||||
Dat | Loc => pron.cdat ;
|
||||
Ins => pron.ins
|
||||
} ;
|
||||
prep = table {
|
||||
Nom => pron.nom ;
|
||||
Gen => pron.pgen ;
|
||||
Dat => pron.pdat ;
|
||||
Acc => pron.pacc ;
|
||||
Loc => pron.loc ;
|
||||
Ins => pron.pins
|
||||
} ;
|
||||
a = pron.a ;
|
||||
hasClit = True ;
|
||||
} ;
|
||||
|
||||
PossPron pron = justDemPronFormsAdjective pron.poss ;
|
||||
PossPron pron = adjFormsAdjective pron.poss ;
|
||||
|
||||
UsePN pn = {
|
||||
s,clit,prep = \\c => pn.s ! c ;
|
||||
@@ -86,7 +74,7 @@ lin
|
||||
hasClit = False ;
|
||||
} ;
|
||||
|
||||
UseN n = nounFormsNoun n ;
|
||||
UseN n = nounFormsNoun n n.g ;
|
||||
|
||||
ApposCN cn np = {
|
||||
s = \\n,c => cn.s ! n ! c ++ np.s ! c ; ---- TODO check apposition order
|
||||
@@ -95,7 +83,10 @@ lin
|
||||
|
||||
NumCard c = c ;
|
||||
NumDigits ds = ds ** {s = \\_,_ => ds.s} ;
|
||||
NumNumeral nu = nu ;
|
||||
NumNumeral nu = {
|
||||
s = \\g,c => (adjFormsAdjective nu.s).s ! g ! Sg ! c ; ---- TODO Sg?
|
||||
size = nu.size
|
||||
} ;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
concrete NumeralHrv of Numeral =
|
||||
|
||||
---- CatHrv [Numeral, Digits] **
|
||||
CatHrv [Numeral, Digits] **
|
||||
|
||||
open
|
||||
ResHrv,
|
||||
@@ -10,11 +10,6 @@ concrete NumeralHrv of Numeral =
|
||||
-- AR 2022-09-27
|
||||
---- TODO ordinal forms
|
||||
|
||||
lincat Numeral = LinNumeral ; ---- TODO move to Cat
|
||||
lincat Digits = {s : Str ; size : NumSize} ;
|
||||
|
||||
param NumSize = NS_1 | NS_2_4 | NS_5_20 | NS_20_ ;
|
||||
|
||||
oper LinNumeral = {s : AdjForms ; size : NumSize} ;
|
||||
oper LinDigit = {unit : AdjForms ; teen, ten, hundred : Str ; size : NumSize} ;
|
||||
|
||||
|
||||
@@ -26,6 +26,8 @@ oper
|
||||
= Dat ;
|
||||
accusative : Case
|
||||
= Acc ;
|
||||
vocative : Case
|
||||
= Voc ;
|
||||
locative : Case
|
||||
= Loc ;
|
||||
instrumental : Case
|
||||
@@ -39,8 +41,8 @@ oper
|
||||
mkN = overload {
|
||||
mkN : (nom : Str) -> N
|
||||
= \nom -> lin N (guessNounForms nom) ;
|
||||
mkN : (nom,gen : Str) -> Gender -> N
|
||||
= \nom,gen,g -> lin N (declensionNounForms nom gen g) ;
|
||||
mkN : (nom,gen : Str) -> Gender -> N ---- TODO
|
||||
= \nom,gen,g -> lin N (guessNounForms nom) ;
|
||||
} ;
|
||||
|
||||
-- The following standard declensions can be used with good accuracy.
|
||||
@@ -49,34 +51,7 @@ oper
|
||||
-- The default extensions are shown in comments; if the default is correct, no extension is needed.
|
||||
-- Notice that some paradigms take two arguments, some take one.
|
||||
|
||||
chlapN : Str -> N
|
||||
= \s -> lin N (R.chlapN s) ;
|
||||
hrdinaN : Str -> N
|
||||
= \s -> lin N (R.hrdinaN s) ;
|
||||
dubN : Str -> N
|
||||
= \s -> lin N (R.dubN s) ;
|
||||
strojN : Str -> N
|
||||
= \s -> lin N (R.strojN s) ;
|
||||
ponyN : Str -> N
|
||||
= \s -> lin N (R.ponyN s) ;
|
||||
zenaN : (snom, pgen : Str) -> N
|
||||
= \s,p -> lin N (R.zenaN s) ** {pgen = p} ;
|
||||
ulicaN : (snom, pgen : Str) -> N
|
||||
= \s,p -> lin N (R.ulicaN s) ** {pgen = p} ;
|
||||
dlanN : (snom, pgen : Str) -> N
|
||||
= \s,p -> lin N (R.dlanN s p) ;
|
||||
kostN : (snom, pgen : Str) -> N
|
||||
= \s,p -> lin N (R.kostN s p) ;
|
||||
mestoN : (snom, pgen : Str) -> N
|
||||
= \s,p -> lin N (R.mestoN s) ** {pgen = p} ;
|
||||
srdceN : (snom, pgen : Str) -> N
|
||||
= \s,p -> lin N (R.srdceN s) ** {pgen = p} ;
|
||||
vysvedcenieN : Str -> N
|
||||
= \s -> lin N (R.vysvedcenieN s) ;
|
||||
dievcaN : Str -> N
|
||||
= \s -> lin N (R.dievcaN s) ;
|
||||
dievceniecN : Str -> N
|
||||
= \s -> lin N (R.dievceniecN s) ;
|
||||
---- TODO
|
||||
|
||||
-- The full definition of the noun record is
|
||||
-- {
|
||||
@@ -92,27 +67,9 @@ oper
|
||||
|
||||
mkA = overload {
|
||||
mkA : Str -> A
|
||||
= \s -> lin A (guessAdjForms s)
|
||||
= \s -> lin A (velikA s)
|
||||
} ;
|
||||
|
||||
peknyA : Str -> A
|
||||
= \s -> lin A (R.peknyA s) ;
|
||||
krasnyA : Str -> A
|
||||
= \s -> lin A (R.krasnyA s) ;
|
||||
cudziA : Str -> A
|
||||
= \s -> lin A (R.cudziA s) ;
|
||||
rydziA : Str -> A
|
||||
= \s -> lin A (R.rydziA s) ;
|
||||
otcovA : Str -> A
|
||||
= \s -> lin A (R.otcovA s) ;
|
||||
paviA : Str -> A
|
||||
= \s -> lin A (R.paviA s) ;
|
||||
|
||||
invarA : Str -> A
|
||||
= \s -> lin A (invarAdjForms s) ;
|
||||
|
||||
mkA2 : A -> Prep -> A2
|
||||
= \a,p -> lin A2 (a ** {c = p}) ;
|
||||
|
||||
-- the full definition of the adjective record is
|
||||
-- {
|
||||
@@ -125,12 +82,12 @@ oper
|
||||
-- Verbs
|
||||
|
||||
mkV2 = overload {
|
||||
mkV2 : VerbForms -> VerbForms ** {c : ComplementCase}
|
||||
= \vf -> vf ** {c = {s = [] ; c = Acc ; hasPrep = False}} ;
|
||||
mkV2 : VerbForms -> Case -> VerbForms ** {c : ComplementCase}
|
||||
= \vf,c -> vf ** {c = {s = [] ; c = c ; hasPrep = False}} ;
|
||||
mkV2 : VerbForms -> ComplementCase -> VerbForms ** {c : ComplementCase}
|
||||
= \vf,c -> vf ** {c = c} ;
|
||||
mkV2 : VerbForms -> V2
|
||||
= \vf -> lin V2 {s = vf ; c = {s = [] ; c = Acc ; hasPrep = False}} ;
|
||||
mkV2 : VerbForms -> Case -> V2
|
||||
= \vf,c -> lin V2 {s = vf ; c = {s = [] ; c = c ; hasPrep = False}} ;
|
||||
mkV2 : VerbForms -> ComplementCase -> V2
|
||||
= \vf,c -> lin V2 {s = vf ; c = c} ;
|
||||
} ;
|
||||
|
||||
------------------------
|
||||
|
||||
@@ -143,29 +143,16 @@ voicing : Str -> Str = \s -> case s of {
|
||||
|
||||
_ => dubN (""+snom) ** {pgen = pgen} ---- Predef.error ("cannot infer declension type for" ++ snom ++ pgen)
|
||||
} ** {pgen = pgen ; g = g} ;
|
||||
|
||||
-}
|
||||
-- the "smartest" one-argument mkN
|
||||
|
||||
guessNounForms : Str -> NounForms
|
||||
guessNounForms : Str -> NounForms ** {g : Gender}
|
||||
= \snom -> case snom of {
|
||||
_ + ("i"|"y"|"e") => ponyN snom ;
|
||||
_ + #softConsonant => strojN snom ;
|
||||
_ + #hardConsonant => dubN snom ;
|
||||
_ + #neutralConsonant => dubN snom ;
|
||||
_ + #hardConsonant + "a" => zenaN snom ;
|
||||
_ + #neutralConsonant + "a" => zenaN snom ;
|
||||
_ + #softConsonant + "a" => ulicaN snom ;
|
||||
_ + ("ia"|"ya") => ulicaN snom ;
|
||||
_ + "o" => mestoN snom ;
|
||||
_ + "ie" => vysvedcenieN snom ;
|
||||
_ + "e" => srdceN snom ;
|
||||
_ + "ä" => dievcaN snom ;
|
||||
|
||||
_ => dubN (""+snom) ---- Predef.error ("cannot guess declension type for" ++ snom)
|
||||
---- TODO
|
||||
_ => izvorN snom ** {g = inanimate}
|
||||
} ;
|
||||
|
||||
-}
|
||||
|
||||
-- the traditional declensions, following Wiki
|
||||
-- they are also exported in ParadigmsHrv with names izvorN etc
|
||||
|
||||
@@ -666,12 +653,17 @@ oper
|
||||
_ => adjAdj.s ! g ! n ! c
|
||||
}
|
||||
} ;
|
||||
-}
|
||||
|
||||
param NumSize = NS_1 | NS_2_4 | NS_5_20 | NS_20_ ;
|
||||
|
||||
oper
|
||||
Determiner : Type = {
|
||||
s : Gender => Case => Str ;
|
||||
size : NumSize
|
||||
} ;
|
||||
|
||||
{-
|
||||
mkDemPronForms : Str -> DemPronForms = \jedn -> {
|
||||
msnom = jedn + "y" ; -- should be "jeden"
|
||||
fsnom = jedn + "a" ;
|
||||
@@ -810,19 +802,17 @@ oper
|
||||
regNumeral sto sto sto sto ;
|
||||
|
||||
invarNumeral : Str -> Determiner = \s -> invarDeterminer s Num5 ;
|
||||
-}
|
||||
|
||||
--------------------------------
|
||||
-- combining nouns with numerals
|
||||
|
||||
param
|
||||
NumSize = Num1 | Num2_4 | Num5 ; -- CEG 6.1
|
||||
|
||||
oper
|
||||
numSizeForm : (Number => Case => Str) -> NumSize -> Case -> Str
|
||||
= \cns,n,c -> case n of {
|
||||
Num1 => cns ! Sg ! c ;
|
||||
Num2_4 => cns ! Pl ! c ;
|
||||
Num5 => case c of {
|
||||
NS_1 => cns ! Sg ! c ;
|
||||
NS_2_4 => cns ! Pl ! c ;
|
||||
_ => case c of {
|
||||
Nom | Acc => cns ! Pl ! Gen ;
|
||||
_ => cns ! Pl ! c
|
||||
}
|
||||
@@ -830,14 +820,14 @@ oper
|
||||
|
||||
numSizeAgr : Gender -> NumSize -> Person -> Agr
|
||||
= \g,ns,p -> case ns of {
|
||||
Num5 => Ag Neutr Sg p ; -- essential grammar 6.1.4
|
||||
Num2_4 => Ag g Pl p ;
|
||||
Num1 => Ag g Sg p
|
||||
NS_1 => Ag g Sg p ;
|
||||
NS_2_4 => Ag g Pl p ;
|
||||
_ => Ag Neutr Sg p ---- TODO verify
|
||||
} ;
|
||||
|
||||
numSizeNumber : NumSize -> Number = \ns -> case ns of {
|
||||
Num1 => Sg ;
|
||||
NS_1 => Sg ;
|
||||
_ => Pl ---- TO CHECK
|
||||
} ;
|
||||
-}
|
||||
|
||||
}
|
||||
|
||||
@@ -14,19 +14,19 @@ lin
|
||||
} ;
|
||||
|
||||
UseCl temp pol cl = {
|
||||
s = temp.s ++ cl.subj ++ cl.clit ++ pol.s ++ verbAgr cl.verb cl.a pol.p ++ cl.compl ;
|
||||
} ;
|
||||
s = temp.s ++ cl.subj ++ cl.clit ++ pol.s ++ verbAgr cl.verb cl.a CTPres ++ cl.compl ;
|
||||
} ; ---- TODO tense, negation
|
||||
|
||||
--- TODO is inversion the standard? ; add indirect questions
|
||||
UseQCl temp pol cl = {
|
||||
s = temp.s ++ cl.clit ++ pol.s ++ verbAgr cl.verb cl.a pol.p ++ cl.subj ++ cl.compl ;
|
||||
} ;
|
||||
s = temp.s ++ cl.clit ++ pol.s ++ verbAgr cl.verb cl.a CTPres ++ cl.subj ++ cl.compl ;
|
||||
} ; ---- TODO tenses
|
||||
|
||||
UseRCl temp pol rcl = {
|
||||
s = \\a => temp.s ++
|
||||
rcl.subj ! a ++ rcl.clit ! a ++
|
||||
pol.s ++ verbAgr rcl.verb a pol.p ++
|
||||
pol.s ++ verbAgr rcl.verb a CTPres ++
|
||||
rcl.compl ! a ;
|
||||
} ;
|
||||
} ; ---- TODO tenses
|
||||
|
||||
}
|
||||
|
||||
@@ -3,22 +3,22 @@ concrete StructuralHrv of Structural = CatHrv **
|
||||
|
||||
lin
|
||||
and_Conj = mkConj "a" ;
|
||||
by8agent_Prep = mkPrep "" Ins ;
|
||||
few_Det = invarNumeral "málo" ; -- see notes
|
||||
---- by8agent_Prep = mkPrep "" Ins ;
|
||||
---- few_Det = invarNumeral "málo" ; -- see notes
|
||||
for_Prep = mkPrep "pre" accusative ;
|
||||
from_Prep = mkPrep (pre {"z" => "zo" ; _ => "z"}) Gen ; ---- consonant clusters and syllable with the onset with the same place of articulation
|
||||
have_V2 = mkV2 haveVerbForms ;
|
||||
in_Prep = mkPrep (pre {"v" => "vo" ; _ => "v"}) Loc ; ----
|
||||
many_Det = regNumeral "mnoho" "mnohých" "mnohým" "mnohými" ; ---- alternative: invarNumeral "veľa" ;
|
||||
from_Prep = mkPrep "iz" Gen ;
|
||||
have_V2 = mkV2 imati_VerbForms ;
|
||||
in_Prep = mkPrep "u" Loc ;
|
||||
---- many_Det = regNumeral "mnoho" "mnohých" "mnohým" "mnohými" ; ---- alternative: invarNumeral "veľa" ;
|
||||
or_Conj = mkConj "alebo" ;
|
||||
somePl_Det = invarDeterminer "niekoľko" Num5 ;
|
||||
--- somePl_Det = {s = \\g,c => (demPronFormsAdjective (mkDemPronForms "niekoľko") "").s ! g ! Pl ! c ; size = Num5} ;
|
||||
something_NP = {s,clit,prep = \\c => "nie" + coForms ! c ; a = Ag Neutr Sg P3 ; hasClit = False} ; -- CEG 5.6.3
|
||||
---- somePl_Det = invarDeterminer "niekoľko" Num5 ;
|
||||
---- somePl_Det = {s = \\g,c => (demPronFormsAdjective (mkDemPronForms "niekoľko") "").s ! g ! Pl ! c ; size = Num5} ;
|
||||
---- something_NP = {s,clit,prep = \\c => "nie" + coForms ! c ; a = Ag Neutr Sg P3 ; hasClit = False} ; -- CEG 5.6.3
|
||||
possess_Prep = mkPrep "" Gen ;
|
||||
that_Quant = demPronFormsAdjective (tenDemPronForms "") "" ;
|
||||
this_Quant = demPronFormsAdjective (tenDemPronForms "" ** {msgen = "toh"}) "to" ;
|
||||
to_Prep = mkPrep "do" Gen ;
|
||||
with_Prep = mkPrep (pre {"s" => "so" ; _ => "s"}) Ins ;
|
||||
---- that_Quant = demPronFormsAdjective (tenDemPronForms "") "" ;
|
||||
---- this_Quant = demPronFormsAdjective (tenDemPronForms "" ** {msgen = "toh"}) "to" ;
|
||||
to_Prep = mkPrep "u" Acc ;
|
||||
with_Prep = mkPrep (pre {"s"|"z"|"š"|"ž"|"mnom" => "sa" ; _ => "s"}) Ins ;
|
||||
|
||||
i_Pron = mkPron (Ag (Masc Anim) Sg P1) ; --- to add Fem pronouns in Extend
|
||||
youSg_Pron = mkPron (Ag (Masc Anim) Sg P2) ;
|
||||
|
||||
@@ -2,7 +2,7 @@ concrete VerbHrv of Verb = CatHrv ** open ResHrv, Prelude in {
|
||||
|
||||
lin
|
||||
UseV v = {
|
||||
verb = v ;
|
||||
verb = v.s ;
|
||||
clit,compl = \\_ => []
|
||||
} ;
|
||||
|
||||
@@ -16,13 +16,13 @@ lin
|
||||
} ;
|
||||
|
||||
SlashV2a v = {
|
||||
verb = v ;
|
||||
verb = v.s ;
|
||||
clit,compl = \\_ => [] ;
|
||||
c = v.c
|
||||
} ;
|
||||
|
||||
UseComp comp = {
|
||||
verb = copulaVerbForms ;
|
||||
verb = biti_VerbForms ; ---- TODO: jesam
|
||||
clit = \\_ => [] ;
|
||||
compl = comp.s
|
||||
} ;
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import json
|
||||
|
||||
# https://kaikki.org/dictionary/rawdata.html
|
||||
# Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data,
|
||||
# Proceedings of the 13th Conference on Language Resources and Evaluation (LREC),
|
||||
# pp. 1317-1325, Marseille, 20-25 June 2022.
|
||||
|
||||
FILE = 'data/raw-wiktextract-data.json'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user