LangHrv compiles now, but with a partial Slovak lexicon

This commit is contained in:
Aarne Ranta
2022-09-28 11:29:03 +02:00
parent 7c2c519e50
commit 13fac41ce6
10 changed files with 81 additions and 146 deletions

View File

@@ -22,9 +22,9 @@ concrete CatHrv of Cat =
VP = {verb : VerbForms ; clit,compl : Agr => Str} ; ---- more fields probably needed VP = {verb : VerbForms ; clit,compl : Agr => Str} ; ---- more fields probably needed
VPSlash = {verb : VerbForms ; clit,compl : Agr => Str ; c : ComplementCase} ; ---- VPSlash = {verb : VerbForms ; clit,compl : Agr => Str ; c : ComplementCase} ; ----
V = ResHrv.VerbForms ; V = {s : VerbForms} ;
V2 = ResHrv.VerbForms ** {c : ComplementCase} ; V2 = {s : VerbForms ; c : ComplementCase} ;
VS,VQ = ResHrv.VerbForms ; VS,VQ = {s : VerbForms} ;
A = ResHrv.AdjForms ; A = ResHrv.AdjForms ;
AP = ResHrv.Adjective ** {isPost : Bool} ; -- {s : Gender => Number => Case => Str} AP = ResHrv.Adjective ** {isPost : Bool} ; -- {s : Gender => Number => Case => Str}
@@ -32,7 +32,7 @@ concrete CatHrv of Cat =
AdA = {s : Str} ; AdA = {s : Str} ;
N = ResHrv.NounForms ; N = ResHrv.NounForms ** {g : Gender} ;
CN = ResHrv.Noun ; -- {s : Number => Case => Str ; g : Gender} CN = ResHrv.Noun ; -- {s : Number => Case => Str ; g : Gender}
NP = {s,clit,prep : Case => Str ; a : Agr ; hasClit : Bool} ; -- clit,prep differ for pronouns NP = {s,clit,prep : Case => Str ; a : Agr ; hasClit : Bool} ; -- clit,prep differ for pronouns
PN = {s : Case => Str ; g : Gender} ; PN = {s : Case => Str ; g : Gender} ;
@@ -40,7 +40,7 @@ concrete CatHrv of Cat =
Quant = {s : Gender => Number => Case => Str} ; -- same as AP Quant = {s : Gender => Number => Case => Str} ; -- same as AP
Num = Determiner ; Num = Determiner ;
Card = Determiner ; -- {s : Gender => Case => Str ; size : NumSize} ; Card = Determiner ; -- {s : Gender => Case => Str ; size : NumSize} ;
Pron = PronForms ** {poss : DemPronForms} ; Pron = PronForms ** {poss : AdjForms} ;
Adv = {s : Str} ; Adv = {s : Str} ;
Prep = ResHrv.ComplementCase ; -- {s : Str ; c : Case ; hasPrep : Bool} ; Prep = ResHrv.ComplementCase ; -- {s : Str ; c : Case ; hasPrep : Bool} ;
@@ -64,8 +64,7 @@ concrete CatHrv of Cat =
A = \s -> s.msnom ; A = \s -> s.msnom ;
lincat Numeral = Determiner ; ---- TODO: should contain Ord as well lincat Numeral = {s : AdjForms ; size : NumSize} ;
lincat Digits = {s:Str ; size : NumSize} ; lincat Digits = {s : Str ; size : NumSize} ;
} }

View File

@@ -57,8 +57,8 @@ concrete LexiconHrv of Lexicon =
green_A = mkA "zelený" ; green_A = mkA "zelený" ;
yellow_A = mkA "žltý" ; yellow_A = mkA "žltý" ;
buy_V2 = mkV2 (iii_kupovatVerbForms "kupovať") ; ---- buy_V2 = mkV2 (iii_kupovatVerbForms "kupovať") ;
love_V2 = mkV2 (iii_kupovatVerbForms "milovať") ; ---- love_V2 = mkV2 (iii_kupovatVerbForms "milovať") ;
} }

View File

@@ -24,39 +24,27 @@ lin
DefArt = {s = \\_,_,_ => []} ; DefArt = {s = \\_,_,_ => []} ;
IndefArt = {s = \\_,_,_ => []} ; IndefArt = {s = \\_,_,_ => []} ;
NumPl = {s = \\_,_ => [] ; size = Num2_4} ; ---- size NumPl = {s = \\_,_ => [] ; size = NS_20_} ; ---- size
NumSg = {s = \\_,_ => [] ; size = Num1} ; NumSg = {s = \\_,_ => [] ; size = NS_1} ;
UsePron pron = { UsePron pron = {
s = table { s, prep = table { ---- TODO check prep
Nom => pron.nom ; Nom | Voc => pron.nom ;
Gen => pron.gen ; Gen | Acc => pron.gen ;
Dat => pron.dat ; Dat | Loc => pron.dat ;
Acc => pron.acc ;
Loc => pron.loc ;
Ins => pron.ins Ins => pron.ins
} ; } ;
clit = table { clit = table { ---- TODO check prep
Nom => pron.cnom ; Nom | Voc => pron.nom ;
Gen => pron.cgen ; Gen | Acc => pron.cgen ;
Dat => pron.cdat ; Dat | Loc => pron.cdat ;
Acc => pron.cacc ;
Loc => pron.loc ;
Ins => pron.ins Ins => pron.ins
} ; } ;
prep = table {
Nom => pron.nom ;
Gen => pron.pgen ;
Dat => pron.pdat ;
Acc => pron.pacc ;
Loc => pron.loc ;
Ins => pron.pins
} ;
a = pron.a ; a = pron.a ;
hasClit = True ; hasClit = True ;
} ; } ;
PossPron pron = justDemPronFormsAdjective pron.poss ; PossPron pron = adjFormsAdjective pron.poss ;
UsePN pn = { UsePN pn = {
s,clit,prep = \\c => pn.s ! c ; s,clit,prep = \\c => pn.s ! c ;
@@ -86,7 +74,7 @@ lin
hasClit = False ; hasClit = False ;
} ; } ;
UseN n = nounFormsNoun n ; UseN n = nounFormsNoun n n.g ;
ApposCN cn np = { ApposCN cn np = {
s = \\n,c => cn.s ! n ! c ++ np.s ! c ; ---- TODO check apposition order s = \\n,c => cn.s ! n ! c ++ np.s ! c ; ---- TODO check apposition order
@@ -95,7 +83,10 @@ lin
NumCard c = c ; NumCard c = c ;
NumDigits ds = ds ** {s = \\_,_ => ds.s} ; NumDigits ds = ds ** {s = \\_,_ => ds.s} ;
NumNumeral nu = nu ; NumNumeral nu = {
s = \\g,c => (adjFormsAdjective nu.s).s ! g ! Sg ! c ; ---- TODO Sg?
size = nu.size
} ;
} }

View File

@@ -1,6 +1,6 @@
concrete NumeralHrv of Numeral = concrete NumeralHrv of Numeral =
---- CatHrv [Numeral, Digits] ** CatHrv [Numeral, Digits] **
open open
ResHrv, ResHrv,
@@ -10,11 +10,6 @@ concrete NumeralHrv of Numeral =
-- AR 2022-09-27 -- AR 2022-09-27
---- TODO ordinal forms ---- TODO ordinal forms
lincat Numeral = LinNumeral ; ---- TODO move to Cat
lincat Digits = {s : Str ; size : NumSize} ;
param NumSize = NS_1 | NS_2_4 | NS_5_20 | NS_20_ ;
oper LinNumeral = {s : AdjForms ; size : NumSize} ; oper LinNumeral = {s : AdjForms ; size : NumSize} ;
oper LinDigit = {unit : AdjForms ; teen, ten, hundred : Str ; size : NumSize} ; oper LinDigit = {unit : AdjForms ; teen, ten, hundred : Str ; size : NumSize} ;

View File

@@ -26,6 +26,8 @@ oper
= Dat ; = Dat ;
accusative : Case accusative : Case
= Acc ; = Acc ;
vocative : Case
= Voc ;
locative : Case locative : Case
= Loc ; = Loc ;
instrumental : Case instrumental : Case
@@ -39,8 +41,8 @@ oper
mkN = overload { mkN = overload {
mkN : (nom : Str) -> N mkN : (nom : Str) -> N
= \nom -> lin N (guessNounForms nom) ; = \nom -> lin N (guessNounForms nom) ;
mkN : (nom,gen : Str) -> Gender -> N mkN : (nom,gen : Str) -> Gender -> N ---- TODO
= \nom,gen,g -> lin N (declensionNounForms nom gen g) ; = \nom,gen,g -> lin N (guessNounForms nom) ;
} ; } ;
-- The following standard declensions can be used with good accuracy. -- The following standard declensions can be used with good accuracy.
@@ -49,34 +51,7 @@ oper
-- The default extensions are shown in comments; if the default is correct, no extension is needed. -- The default extensions are shown in comments; if the default is correct, no extension is needed.
-- Notice that some paradigms take two arguments, some take one. -- Notice that some paradigms take two arguments, some take one.
chlapN : Str -> N ---- TODO
= \s -> lin N (R.chlapN s) ;
hrdinaN : Str -> N
= \s -> lin N (R.hrdinaN s) ;
dubN : Str -> N
= \s -> lin N (R.dubN s) ;
strojN : Str -> N
= \s -> lin N (R.strojN s) ;
ponyN : Str -> N
= \s -> lin N (R.ponyN s) ;
zenaN : (snom, pgen : Str) -> N
= \s,p -> lin N (R.zenaN s) ** {pgen = p} ;
ulicaN : (snom, pgen : Str) -> N
= \s,p -> lin N (R.ulicaN s) ** {pgen = p} ;
dlanN : (snom, pgen : Str) -> N
= \s,p -> lin N (R.dlanN s p) ;
kostN : (snom, pgen : Str) -> N
= \s,p -> lin N (R.kostN s p) ;
mestoN : (snom, pgen : Str) -> N
= \s,p -> lin N (R.mestoN s) ** {pgen = p} ;
srdceN : (snom, pgen : Str) -> N
= \s,p -> lin N (R.srdceN s) ** {pgen = p} ;
vysvedcenieN : Str -> N
= \s -> lin N (R.vysvedcenieN s) ;
dievcaN : Str -> N
= \s -> lin N (R.dievcaN s) ;
dievceniecN : Str -> N
= \s -> lin N (R.dievceniecN s) ;
-- The full definition of the noun record is -- The full definition of the noun record is
-- { -- {
@@ -92,27 +67,9 @@ oper
mkA = overload { mkA = overload {
mkA : Str -> A mkA : Str -> A
= \s -> lin A (guessAdjForms s) = \s -> lin A (velikA s)
} ; } ;
peknyA : Str -> A
= \s -> lin A (R.peknyA s) ;
krasnyA : Str -> A
= \s -> lin A (R.krasnyA s) ;
cudziA : Str -> A
= \s -> lin A (R.cudziA s) ;
rydziA : Str -> A
= \s -> lin A (R.rydziA s) ;
otcovA : Str -> A
= \s -> lin A (R.otcovA s) ;
paviA : Str -> A
= \s -> lin A (R.paviA s) ;
invarA : Str -> A
= \s -> lin A (invarAdjForms s) ;
mkA2 : A -> Prep -> A2
= \a,p -> lin A2 (a ** {c = p}) ;
-- the full definition of the adjective record is -- the full definition of the adjective record is
-- { -- {
@@ -125,12 +82,12 @@ oper
-- Verbs -- Verbs
mkV2 = overload { mkV2 = overload {
mkV2 : VerbForms -> VerbForms ** {c : ComplementCase} mkV2 : VerbForms -> V2
= \vf -> vf ** {c = {s = [] ; c = Acc ; hasPrep = False}} ; = \vf -> lin V2 {s = vf ; c = {s = [] ; c = Acc ; hasPrep = False}} ;
mkV2 : VerbForms -> Case -> VerbForms ** {c : ComplementCase} mkV2 : VerbForms -> Case -> V2
= \vf,c -> vf ** {c = {s = [] ; c = c ; hasPrep = False}} ; = \vf,c -> lin V2 {s = vf ; c = {s = [] ; c = c ; hasPrep = False}} ;
mkV2 : VerbForms -> ComplementCase -> VerbForms ** {c : ComplementCase} mkV2 : VerbForms -> ComplementCase -> V2
= \vf,c -> vf ** {c = c} ; = \vf,c -> lin V2 {s = vf ; c = c} ;
} ; } ;
------------------------ ------------------------

View File

@@ -143,29 +143,16 @@ voicing : Str -> Str = \s -> case s of {
_ => dubN (""+snom) ** {pgen = pgen} ---- Predef.error ("cannot infer declension type for" ++ snom ++ pgen) _ => dubN (""+snom) ** {pgen = pgen} ---- Predef.error ("cannot infer declension type for" ++ snom ++ pgen)
} ** {pgen = pgen ; g = g} ; } ** {pgen = pgen ; g = g} ;
-}
-- the "smartest" one-argument mkN -- the "smartest" one-argument mkN
guessNounForms : Str -> NounForms guessNounForms : Str -> NounForms ** {g : Gender}
= \snom -> case snom of { = \snom -> case snom of {
_ + ("i"|"y"|"e") => ponyN snom ;
_ + #softConsonant => strojN snom ;
_ + #hardConsonant => dubN snom ;
_ + #neutralConsonant => dubN snom ;
_ + #hardConsonant + "a" => zenaN snom ;
_ + #neutralConsonant + "a" => zenaN snom ;
_ + #softConsonant + "a" => ulicaN snom ;
_ + ("ia"|"ya") => ulicaN snom ;
_ + "o" => mestoN snom ;
_ + "ie" => vysvedcenieN snom ;
_ + "e" => srdceN snom ;
_ + "ä" => dievcaN snom ;
_ => dubN (""+snom) ---- Predef.error ("cannot guess declension type for" ++ snom) ---- TODO
_ => izvorN snom ** {g = inanimate}
} ; } ;
-}
-- the traditional declensions, following Wiki -- the traditional declensions, following Wiki
-- they are also exported in ParadigmsHrv with names izvorN etc -- they are also exported in ParadigmsHrv with names izvorN etc
@@ -666,12 +653,17 @@ oper
_ => adjAdj.s ! g ! n ! c _ => adjAdj.s ! g ! n ! c
} }
} ; } ;
-}
param NumSize = NS_1 | NS_2_4 | NS_5_20 | NS_20_ ;
oper
Determiner : Type = { Determiner : Type = {
s : Gender => Case => Str ; s : Gender => Case => Str ;
size : NumSize size : NumSize
} ; } ;
{-
mkDemPronForms : Str -> DemPronForms = \jedn -> { mkDemPronForms : Str -> DemPronForms = \jedn -> {
msnom = jedn + "y" ; -- should be "jeden" msnom = jedn + "y" ; -- should be "jeden"
fsnom = jedn + "a" ; fsnom = jedn + "a" ;
@@ -810,19 +802,17 @@ oper
regNumeral sto sto sto sto ; regNumeral sto sto sto sto ;
invarNumeral : Str -> Determiner = \s -> invarDeterminer s Num5 ; invarNumeral : Str -> Determiner = \s -> invarDeterminer s Num5 ;
-}
-------------------------------- --------------------------------
-- combining nouns with numerals -- combining nouns with numerals
param
NumSize = Num1 | Num2_4 | Num5 ; -- CEG 6.1
oper oper
numSizeForm : (Number => Case => Str) -> NumSize -> Case -> Str numSizeForm : (Number => Case => Str) -> NumSize -> Case -> Str
= \cns,n,c -> case n of { = \cns,n,c -> case n of {
Num1 => cns ! Sg ! c ; NS_1 => cns ! Sg ! c ;
Num2_4 => cns ! Pl ! c ; NS_2_4 => cns ! Pl ! c ;
Num5 => case c of { _ => case c of {
Nom | Acc => cns ! Pl ! Gen ; Nom | Acc => cns ! Pl ! Gen ;
_ => cns ! Pl ! c _ => cns ! Pl ! c
} }
@@ -830,14 +820,14 @@ oper
numSizeAgr : Gender -> NumSize -> Person -> Agr numSizeAgr : Gender -> NumSize -> Person -> Agr
= \g,ns,p -> case ns of { = \g,ns,p -> case ns of {
Num5 => Ag Neutr Sg p ; -- essential grammar 6.1.4 NS_1 => Ag g Sg p ;
Num2_4 => Ag g Pl p ; NS_2_4 => Ag g Pl p ;
Num1 => Ag g Sg p _ => Ag Neutr Sg p ---- TODO verify
} ; } ;
numSizeNumber : NumSize -> Number = \ns -> case ns of { numSizeNumber : NumSize -> Number = \ns -> case ns of {
Num1 => Sg ; NS_1 => Sg ;
_ => Pl ---- TO CHECK _ => Pl ---- TO CHECK
} ; } ;
-}
} }

View File

@@ -14,19 +14,19 @@ lin
} ; } ;
UseCl temp pol cl = { UseCl temp pol cl = {
s = temp.s ++ cl.subj ++ cl.clit ++ pol.s ++ verbAgr cl.verb cl.a pol.p ++ cl.compl ; s = temp.s ++ cl.subj ++ cl.clit ++ pol.s ++ verbAgr cl.verb cl.a CTPres ++ cl.compl ;
} ; } ; ---- TODO tense, negation
--- TODO is inversion the standard? ; add indirect questions --- TODO is inversion the standard? ; add indirect questions
UseQCl temp pol cl = { UseQCl temp pol cl = {
s = temp.s ++ cl.clit ++ pol.s ++ verbAgr cl.verb cl.a pol.p ++ cl.subj ++ cl.compl ; s = temp.s ++ cl.clit ++ pol.s ++ verbAgr cl.verb cl.a CTPres ++ cl.subj ++ cl.compl ;
} ; } ; ---- TODO tenses
UseRCl temp pol rcl = { UseRCl temp pol rcl = {
s = \\a => temp.s ++ s = \\a => temp.s ++
rcl.subj ! a ++ rcl.clit ! a ++ rcl.subj ! a ++ rcl.clit ! a ++
pol.s ++ verbAgr rcl.verb a pol.p ++ pol.s ++ verbAgr rcl.verb a CTPres ++
rcl.compl ! a ; rcl.compl ! a ;
} ; } ; ---- TODO tenses
} }

View File

@@ -3,22 +3,22 @@ concrete StructuralHrv of Structural = CatHrv **
lin lin
and_Conj = mkConj "a" ; and_Conj = mkConj "a" ;
by8agent_Prep = mkPrep "" Ins ; ---- by8agent_Prep = mkPrep "" Ins ;
few_Det = invarNumeral "málo" ; -- see notes ---- few_Det = invarNumeral "málo" ; -- see notes
for_Prep = mkPrep "pre" accusative ; for_Prep = mkPrep "pre" accusative ;
from_Prep = mkPrep (pre {"z" => "zo" ; _ => "z"}) Gen ; ---- consonant clusters and syllable with the onset with the same place of articulation from_Prep = mkPrep "iz" Gen ;
have_V2 = mkV2 haveVerbForms ; have_V2 = mkV2 imati_VerbForms ;
in_Prep = mkPrep (pre {"v" => "vo" ; _ => "v"}) Loc ; ---- in_Prep = mkPrep "u" Loc ;
many_Det = regNumeral "mnoho" "mnohých" "mnohým" "mnohými" ; ---- alternative: invarNumeral "veľa" ; ---- many_Det = regNumeral "mnoho" "mnohých" "mnohým" "mnohými" ; ---- alternative: invarNumeral "veľa" ;
or_Conj = mkConj "alebo" ; or_Conj = mkConj "alebo" ;
somePl_Det = invarDeterminer "niekoľko" Num5 ; ---- somePl_Det = invarDeterminer "niekoľko" Num5 ;
--- somePl_Det = {s = \\g,c => (demPronFormsAdjective (mkDemPronForms "niekoľko") "").s ! g ! Pl ! c ; size = Num5} ; ---- somePl_Det = {s = \\g,c => (demPronFormsAdjective (mkDemPronForms "niekoľko") "").s ! g ! Pl ! c ; size = Num5} ;
something_NP = {s,clit,prep = \\c => "nie" + coForms ! c ; a = Ag Neutr Sg P3 ; hasClit = False} ; -- CEG 5.6.3 ---- something_NP = {s,clit,prep = \\c => "nie" + coForms ! c ; a = Ag Neutr Sg P3 ; hasClit = False} ; -- CEG 5.6.3
possess_Prep = mkPrep "" Gen ; possess_Prep = mkPrep "" Gen ;
that_Quant = demPronFormsAdjective (tenDemPronForms "") "" ; ---- that_Quant = demPronFormsAdjective (tenDemPronForms "") "" ;
this_Quant = demPronFormsAdjective (tenDemPronForms "" ** {msgen = "toh"}) "to" ; ---- this_Quant = demPronFormsAdjective (tenDemPronForms "" ** {msgen = "toh"}) "to" ;
to_Prep = mkPrep "do" Gen ; to_Prep = mkPrep "u" Acc ;
with_Prep = mkPrep (pre {"s" => "so" ; _ => "s"}) Ins ; with_Prep = mkPrep (pre {"s"|"z"|"š"|"ž"|"mnom" => "sa" ; _ => "s"}) Ins ;
i_Pron = mkPron (Ag (Masc Anim) Sg P1) ; --- to add Fem pronouns in Extend i_Pron = mkPron (Ag (Masc Anim) Sg P1) ; --- to add Fem pronouns in Extend
youSg_Pron = mkPron (Ag (Masc Anim) Sg P2) ; youSg_Pron = mkPron (Ag (Masc Anim) Sg P2) ;

View File

@@ -2,7 +2,7 @@ concrete VerbHrv of Verb = CatHrv ** open ResHrv, Prelude in {
lin lin
UseV v = { UseV v = {
verb = v ; verb = v.s ;
clit,compl = \\_ => [] clit,compl = \\_ => []
} ; } ;
@@ -16,13 +16,13 @@ lin
} ; } ;
SlashV2a v = { SlashV2a v = {
verb = v ; verb = v.s ;
clit,compl = \\_ => [] ; clit,compl = \\_ => [] ;
c = v.c c = v.c
} ; } ;
UseComp comp = { UseComp comp = {
verb = copulaVerbForms ; verb = biti_VerbForms ; ---- TODO: jesam
clit = \\_ => [] ; clit = \\_ => [] ;
compl = comp.s compl = comp.s
} ; } ;

View File

@@ -1,6 +1,9 @@
import json import json
# https://kaikki.org/dictionary/rawdata.html # https://kaikki.org/dictionary/rawdata.html
# Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data,
# Proceedings of the 13th Conference on Language Resources and Evaluation (LREC),
# pp. 1317-1325, Marseille, 20-25 June 2022.
FILE = 'data/raw-wiktextract-data.json' FILE = 'data/raw-wiktextract-data.json'