From ad716539d450e9b61070051bb7de99a5ad697663 Mon Sep 17 00:00:00 2001 From: krasimir Date: Wed, 10 Feb 2016 18:12:48 +0000 Subject: [PATCH] add a skeletal morphology for Slovenian and a script for importing the SLOLEKS lexicon to GF --- lib/src/slovenian/CatSlv.gf | 11 ++ lib/src/slovenian/ParadigmsSlv.gf | 266 ++++++++++++++++++++++++++++++ lib/src/slovenian/ResSlv.gf | 27 +++ lib/src/slovenian/convert.py | 198 ++++++++++++++++++++++ 4 files changed, 502 insertions(+) create mode 100644 lib/src/slovenian/CatSlv.gf create mode 100644 lib/src/slovenian/ParadigmsSlv.gf create mode 100644 lib/src/slovenian/ResSlv.gf create mode 100644 lib/src/slovenian/convert.py diff --git a/lib/src/slovenian/CatSlv.gf b/lib/src/slovenian/CatSlv.gf new file mode 100644 index 000000000..66ff1368b --- /dev/null +++ b/lib/src/slovenian/CatSlv.gf @@ -0,0 +1,11 @@ +concrete CatSlv of Cat = open ResSlv in { + +lincat + N = {s : Case => Number => Str; g : Gender}; + PN = {s : Case => Number => Str; g : Gender}; + + A = {s : AForm => Str}; + + V = {s : VForm => Str}; + +} diff --git a/lib/src/slovenian/ParadigmsSlv.gf b/lib/src/slovenian/ParadigmsSlv.gf new file mode 100644 index 000000000..6e0bdfc63 --- /dev/null +++ b/lib/src/slovenian/ParadigmsSlv.gf @@ -0,0 +1,266 @@ +resource ParadigmsSlv = open CatSlv, ResSlv in { + +oper + masculine = Masc; + feminine = Fem; + neuter = Neut; + + mkN : (_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ : Str) -> Gender -> N = + \nomsg,nomdl,nompl,gensg,gendl,genpl,datsg,datdl,datpl,accsg,accdl,accpl,locsg,locdl,locpl,instrsg,instrdl,instrpl,g -> lin N { + s = table { + Nom => table {Sg=>nomsg; Dl=>nomdl; Pl=>nompl}; + Gen => table {Sg=>gensg; Dl=>gendl; Pl=>genpl}; + Dat => table {Sg=>datsg; Dl=>datdl; Pl=>datpl}; + Acc => table {Sg=>accsg; Dl=>accdl; Pl=>accpl}; + Loc => table {Sg=>locsg; Dl=>nomdl; Pl=>locpl}; + Instr => table {Sg=>instrsg; Dl=>instrdl; Pl=>instrpl} + }; + g = g + }; + + mkPN : (_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ : Str) -> Gender -> PN = + \nomsg,nomdl,nompl,gensg,gendl,genpl,datsg,datdl,datpl,accsg,accdl,accpl,locsg,locdl,locpl,instrsg,instrdl,instrpl,g -> lin PN { + s = table { + Nom => table {Sg=>nomsg; Dl=>nomdl; Pl=>nompl}; + Gen => table {Sg=>gensg; Dl=>gendl; Pl=>genpl}; + Dat => table {Sg=>datsg; Dl=>datdl; Pl=>datpl}; + Acc => table {Sg=>accsg; Dl=>accdl; Pl=>accpl}; + Loc => table {Sg=>locsg; Dl=>nomdl; Pl=>locpl}; + Instr => table {Sg=>instrsg; Dl=>instrdl; Pl=>instrpl} + }; + g = g + }; + + mkV : (_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ : Str) -> V = + \inf,sup,partsgm,partdlm,partplm,partsgf,partdlf,partplf,partsgn,partdln,partpln,pres1sg,pres2sg,pres3sg,pres1dl,pres2dl,pres3dl,pres1pl,pres2pl,pres3pl,imp1dl,imp1pl,imp2sg,imp2dl,imp2pl -> lin V { + s = table { + VInf => inf; + VSup => sup; + VPastPart Masc Sg => partsgm; + VPastPart Masc Dl => partdlm; + VPastPart Masc Pl => partplm; + VPastPart Fem Sg => partsgf; + VPastPart Fem Dl => partdlf; + VPastPart Fem Pl => partplf; + VPastPart Neut Sg => partsgn; + VPastPart Neut Dl => partdln; + VPastPart Neut Pl => partpln; + VPres Sg P1 => pres1sg; + VPres Sg P2 => pres2sg; + VPres Sg P3 => pres3sg; + VPres Dl P1 => pres1dl; + VPres Dl P2 => pres2dl; + VPres Dl P3 => pres3dl; + VPres Pl P1 => pres1pl; + VPres Pl P2 => pres2pl; + VPres Pl P3 => pres3pl; + VImper1Sg => imp1dl; + VImper1Dl => imp1pl; + VImper2 Sg => imp2sg; + VImper2 Dl => imp2dl; + VImper2 Pl => imp2pl + } + }; + + mkA : (_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ : Str) -> A = + \positMSgNom,positMSgNomDef,positMSgGen,positMSgDat,positMSgAcc,positMSgAccIndef,positMSgAccDef,positMSgLoc,positMSgInstr, + positMDlNom,positMDlGen,positMDlDat,positMDlAcc,positMDlLoc,positMDlInstr, + positMPlNom,positMPlGen,positMPlDat,positMPlAcc,positMPlLoc,positMPlInstr, + positFSgNom,positFSgGen,positFSgDat,positFSgAcc,positFSgLoc,positFSgInstr, + positFDlNom,positFDlGen,positFDlDat,positFDlAcc,positFDlLoc,positFDlInstr, + positFPlNom,positFPlGen,positFPlDat,positFPlAcc,positFPlLoc,positFPlInstr, + positNSgNom,positNSgGen,positNSgDat,positNSgAcc,positNSgLoc,positNSgInstr, + positNDlNom,positNDlGen,positNDlDat,positNDlAcc,positNDlLoc,positNDlInstr, + positNPlNom,positNPlGen,positNPlDat,positNPlAcc,positNPlLoc,positNPlInstr, + comparMSgNom,comparMSgGen,comparMSgDat,comparMSgAcc,comparMSgAccDef,comparMSgLoc,comparMSgInstr, + comparMDlNom,comparMDlGen,comparMDlDat,comparMDlAcc,comparMDlLoc,comparMDlInstr, + comparMPlNom,comparMPlGen,comparMPlDat,comparMPlAcc,comparMPlLoc,comparMPlInstr, + comparFSgNom,comparFSgGen,comparFSgDat,comparFSgAcc,comparFSgLoc,comparFSgInstr, + comparFDlNom,comparFDlGen,comparFDlDat,comparFDlAcc,comparFDlLoc,comparFDlInstr, + comparFPlNom,comparFPlGen,comparFPlDat,comparFPlAcc,comparFPlLoc,comparFPlInstr, + comparNSgNom,comparNSgGen,comparNSgDat,comparNSgAcc,comparNSgLoc,comparNSgInstr, + comparNDlNom,comparNDlGen,comparNDlDat,comparNDlAcc,comparNDlLoc,comparNDlInstr, + comparNPlNom,comparNPlGen,comparNPlDat,comparNPlAcc,comparNPlLoc,comparNPlInstr, + superlMSgNom,superlMSgGen,superlMSgDat,superlMSgAcc,superlMSgAccDef,superlMSgLoc,superlMSgInstr, + superlMDlNom,superlMDlGen,superlMDlDat,superlMDlAcc,superlMDlLoc,superlMDlInstr, + superlMPlNom,superlMPlGen,superlMPlDat,superlMPlAcc,superlMPlLoc,superlMPlInstr, + superlFSgNom,superlFSgGen,superlFSgDat,superlFSgAcc,superlFSgLoc,superlFSgInstr, + superlFDlNom,superlFDlGen,superlFDlDat,superlFDlAcc,superlFDlLoc,superlFDlInstr, + superlFPlNom,superlFPlGen,superlFPlDat,superlFPlAcc,superlFPlLoc,superlFPlInstr, + superlNSgNom,superlNSgGen,superlNSgDat,superlNSgAcc,superlNSgLoc,superlNSgInstr, + superlNDlNom,superlNDlGen,superlNDlDat,superlNDlAcc,superlNDlLoc,superlNDlInstr, + superlNPlNom,superlNPlGen,superlNPlDat,superlNPlAcc,superlNPlLoc,superlNPlInstr -> lin A { + s = table { + APosit Masc Sg Nom => positMSgNom; + APositDefNom => positMSgNomDef; + APosit Masc Sg Gen => positMSgGen; + APosit Masc Sg Dat => positMSgDat; + APosit Masc Sg Acc => positMSgAcc; + APositIndefAcc => positMSgAccIndef; + APositDefAcc => positMSgAccDef; + APosit Masc Sg Loc => positMSgLoc; + APosit Masc Sg Instr => positMSgInstr; + APosit Masc Dl Nom => positMDlNom; + APosit Masc Dl Gen => positMDlGen; + APosit Masc Dl Dat => positMDlDat; + APosit Masc Dl Acc => positMDlAcc; + APosit Masc Dl Loc => positMDlLoc; + APosit Masc Dl Instr => positMDlInstr; + APosit Masc Pl Nom => positMPlNom; + APosit Masc Pl Gen => positMPlGen; + APosit Masc Pl Dat => positMPlDat; + APosit Masc Pl Acc => positMPlAcc; + APosit Masc Pl Loc => positMPlLoc; + APosit Masc Pl Instr => positMPlInstr; + APosit Fem Sg Nom => positFSgNom; + APosit Fem Sg Gen => positFSgGen; + APosit Fem Sg Dat => positFSgDat; + APosit Fem Sg Acc => positFSgAcc; + APosit Fem Sg Loc => positFSgLoc; + APosit Fem Sg Instr => positFSgInstr; + APosit Fem Dl Nom => positFDlNom; + APosit Fem Dl Gen => positFDlGen; + APosit Fem Dl Dat => positFDlDat; + APosit Fem Dl Acc => positFDlAcc; + APosit Fem Dl Loc => positFDlLoc; + APosit Fem Dl Instr => positFDlInstr; + APosit Fem Pl Nom => positFPlNom; + APosit Fem Pl Gen => positFPlGen; + APosit Fem Pl Dat => positFPlDat; + APosit Fem Pl Acc => positFPlAcc; + APosit Fem Pl Loc => positFPlLoc; + APosit Fem Pl Instr => positFPlInstr; + APosit Neut Sg Nom => positNSgNom; + APosit Neut Sg Gen => positNSgGen; + APosit Neut Sg Dat => positNSgDat; + APosit Neut Sg Acc => positNSgAcc; + APosit Neut Sg Loc => positNSgLoc; + APosit Neut Sg Instr => positNSgInstr; + APosit Neut Dl Nom => positNDlNom; + APosit Neut Dl Gen => positNDlGen; + APosit Neut Dl Dat => positNDlDat; + APosit Neut Dl Acc => positNDlAcc; + APosit Neut Dl Loc => positNDlLoc; + APosit Neut Dl Instr => positNDlInstr; + APosit Neut Pl Nom => positNPlNom; + APosit Neut Pl Gen => positNPlGen; + APosit Neut Pl Dat => positNPlDat; + APosit Neut Pl Acc => positNPlAcc; + APosit Neut Pl Loc => positNPlLoc; + APosit Neut Pl Instr => positNPlInstr; + + ACompar Masc Sg Nom => comparMSgNom; + ACompar Masc Sg Gen => comparMSgGen; + ACompar Masc Sg Dat => comparMSgDat; + ACompar Masc Sg Acc => comparMSgAcc; + AComparDefAcc => comparMSgAccDef; + ACompar Masc Sg Loc => comparMSgLoc; + ACompar Masc Sg Instr => comparMSgInstr; + ACompar Masc Dl Nom => comparMDlNom; + ACompar Masc Dl Gen => comparMDlGen; + ACompar Masc Dl Dat => comparMDlDat; + ACompar Masc Dl Acc => comparMDlAcc; + ACompar Masc Dl Loc => comparMDlLoc; + ACompar Masc Dl Instr => comparMDlInstr; + ACompar Masc Pl Nom => comparMPlNom; + ACompar Masc Pl Gen => comparMPlGen; + ACompar Masc Pl Dat => comparMPlDat; + ACompar Masc Pl Acc => comparMPlAcc; + ACompar Masc Pl Loc => comparMPlLoc; + ACompar Masc Pl Instr => comparMPlInstr; + ACompar Fem Sg Nom => comparFSgNom; + ACompar Fem Sg Gen => comparFSgGen; + ACompar Fem Sg Dat => comparFSgDat; + ACompar Fem Sg Acc => comparFSgAcc; + ACompar Fem Sg Loc => comparFSgLoc; + ACompar Fem Sg Instr => comparFSgInstr; + ACompar Fem Dl Nom => comparFDlNom; + ACompar Fem Dl Gen => comparFDlGen; + ACompar Fem Dl Dat => comparFDlDat; + ACompar Fem Dl Acc => comparFDlAcc; + ACompar Fem Dl Loc => comparFDlLoc; + ACompar Fem Dl Instr => comparFDlInstr; + ACompar Fem Pl Nom => comparFPlNom; + ACompar Fem Pl Gen => comparFPlGen; + ACompar Fem Pl Dat => comparFPlDat; + ACompar Fem Pl Acc => comparFPlAcc; + ACompar Fem Pl Loc => comparFPlLoc; + ACompar Fem Pl Instr => comparFPlInstr; + ACompar Neut Sg Nom => comparNSgNom; + ACompar Neut Sg Gen => comparNSgGen; + ACompar Neut Sg Dat => comparNSgDat; + ACompar Neut Sg Acc => comparNSgAcc; + ACompar Neut Sg Loc => comparNSgLoc; + ACompar Neut Sg Instr => comparNSgInstr; + ACompar Neut Dl Nom => comparNDlNom; + ACompar Neut Dl Gen => comparNDlGen; + ACompar Neut Dl Dat => comparNDlDat; + ACompar Neut Dl Acc => comparNDlAcc; + ACompar Neut Dl Loc => comparNDlLoc; + ACompar Neut Dl Instr => comparNDlInstr; + ACompar Neut Pl Nom => comparNPlNom; + ACompar Neut Pl Gen => comparNPlGen; + ACompar Neut Pl Dat => comparNPlDat; + ACompar Neut Pl Acc => comparNPlAcc; + ACompar Neut Pl Loc => comparNPlLoc; + ACompar Neut Pl Instr => comparNPlInstr; + + ASuperl Masc Sg Nom => superlMSgNom; + ASuperl Masc Sg Gen => superlMSgGen; + ASuperl Masc Sg Dat => superlMSgDat; + ASuperl Masc Sg Acc => superlMSgAcc; + ASuperl Masc Sg Loc => superlMSgLoc; + ASuperl Masc Sg Instr => superlMSgInstr; + ASuperl Masc Dl Nom => superlMDlNom; + ASuperl Masc Dl Gen => superlMDlGen; + ASuperl Masc Dl Dat => superlMDlDat; + ASuperl Masc Dl Acc => superlMDlAcc; + ASuperlDefAcc => superlMSgAccDef; + ASuperl Masc Dl Loc => superlMDlLoc; + ASuperl Masc Dl Instr => superlMDlInstr; + ASuperl Masc Pl Nom => superlMPlNom; + ASuperl Masc Pl Gen => superlMPlGen; + ASuperl Masc Pl Dat => superlMPlDat; + ASuperl Masc Pl Acc => superlMPlAcc; + ASuperl Masc Pl Loc => superlMPlLoc; + ASuperl Masc Pl Instr => superlMPlInstr; + ASuperl Fem Sg Nom => superlFSgNom; + ASuperl Fem Sg Gen => superlFSgGen; + ASuperl Fem Sg Dat => superlFSgDat; + ASuperl Fem Sg Acc => superlFSgAcc; + ASuperl Fem Sg Loc => superlFSgLoc; + ASuperl Fem Sg Instr => superlFSgInstr; + ASuperl Fem Dl Nom => superlFDlNom; + ASuperl Fem Dl Gen => superlFDlGen; + ASuperl Fem Dl Dat => superlFDlDat; + ASuperl Fem Dl Acc => superlFDlAcc; + ASuperl Fem Dl Loc => superlFDlLoc; + ASuperl Fem Dl Instr => superlFDlInstr; + ASuperl Fem Pl Nom => superlFPlNom; + ASuperl Fem Pl Gen => superlFPlGen; + ASuperl Fem Pl Dat => superlFPlDat; + ASuperl Fem Pl Acc => superlFPlAcc; + ASuperl Fem Pl Loc => superlFPlLoc; + ASuperl Fem Pl Instr => superlFPlInstr; + ASuperl Neut Sg Nom => superlNSgNom; + ASuperl Neut Sg Gen => superlNSgGen; + ASuperl Neut Sg Dat => superlNSgDat; + ASuperl Neut Sg Acc => superlNSgAcc; + ASuperl Neut Sg Loc => superlNSgLoc; + ASuperl Neut Sg Instr => superlNSgInstr; + ASuperl Neut Dl Nom => superlNDlNom; + ASuperl Neut Dl Gen => superlNDlGen; + ASuperl Neut Dl Dat => superlNDlDat; + ASuperl Neut Dl Acc => superlNDlAcc; + ASuperl Neut Dl Loc => superlNDlLoc; + ASuperl Neut Dl Instr => superlNDlInstr; + ASuperl Neut Pl Nom => superlNPlNom; + ASuperl Neut Pl Gen => superlNPlGen; + ASuperl Neut Pl Dat => superlNPlDat; + ASuperl Neut Pl Acc => superlNPlAcc; + ASuperl Neut Pl Loc => superlNPlLoc; + ASuperl Neut Pl Instr => superlNPlInstr + } + }; + +} diff --git a/lib/src/slovenian/ResSlv.gf b/lib/src/slovenian/ResSlv.gf new file mode 100644 index 000000000..39ebc5871 --- /dev/null +++ b/lib/src/slovenian/ResSlv.gf @@ -0,0 +1,27 @@ +resource ResSlv = { + +param + Case = Nom | Gen | Dat | Acc | Loc | Instr; + Number = Sg | Dl | Pl ; + Gender = Masc | Fem | Neut ; + Person = P1 | P2 | P3 ; + Species = Indef | Def ; + + VForm = VInf + | VSup + | VPastPart Gender Number + | VPres Number Person + | VImper1Sg + | VImper1Dl + | VImper2 Number ; + + AForm = APosit Gender Number Case + | ACompar Gender Number Case + | ASuperl Gender Number Case + | APositDefNom + | APositIndefAcc + | APositDefAcc + | AComparDefAcc + | ASuperlDefAcc ; + +} diff --git a/lib/src/slovenian/convert.py b/lib/src/slovenian/convert.py new file mode 100644 index 000000000..b48acd80b --- /dev/null +++ b/lib/src/slovenian/convert.py @@ -0,0 +1,198 @@ +# coding=utf-8 + +import xml.sax + +class InkscapeSvgHandler(xml.sax.ContentHandler): + def __init__(self): + self.parents = [] + self.lemma = None + self.pos = None + self.pos2 = None + self.msd = None + self.forms = None + + self.ids = {} + + self.absf = open("DictSlvAbs.gf", "w") + self.absf.write("abstract DictSlvAbs = Cat ** {\n"); + self.absf.write("fun\n"); + + self.cncf = open("DictSlv.gf", "w") + self.cncf.write("concrete DictSlv of DictSlvAbs = CatSlv ** open ParadigmsSlv, Prelude in {\n"); + self.cncf.write("lin\n"); + + def close(self): + self.absf.write("}"); + self.absf.close(); + + self.cncf.write("}"); + self.cncf.close(); + + def gen_id(self, lemma, tag): + i = 1 + while True: + ident = "" + quote = False + for c in lemma.lower(): + if c < "a" or c > "z": + quote = True + if c == '\'': + ident = ident + "\\\'" + else: + ident = ident + c + ident = ident + "_" + str(i) + "_" + tag + if quote: + ident = "'" + ident + "'" + if not self.ids.has_key(ident): + self.ids[ident] = ident + break + i = i + 1 + return ident + + def startElement(self, name, attrs): + if name == "LexicalEntry": + self.forms = {} + self.pos = None + self.pos2 = None + elif name == "feat": + if attrs["att"] == "zapis_oblike": + if self.parents[-1] == "Lemma": + self.lemma = attrs["val"] + elif self.parents[-1] == "FormRepresentation": + if self.forms.has_key(self.msd): + l = self.forms[self.msd] + else: + l = [] + self.forms[self.msd] = l + l.append(attrs["val"]) + elif attrs["att"] == "besedna_vrsta" and self.parents[-1] == "LexicalEntry": + self.pos = attrs["val"] + elif attrs["att"] == "vrsta" and self.parents[-1] == "LexicalEntry": + self.pos2 = attrs["val"] + elif attrs["att"] == "msd" and self.parents[-1] == "WordForm": + self.msd = attrs["val"] + self.parents.append(name) + + def endElement(self, name): + self.parents.pop() + if name == "LexicalEntry": + if self.pos2 == "lastno_ime": + ident = self.gen_id(self.lemma, "PN") + s = " " + ident + " : PN ;\n" + self.absf.write(s.encode("utf-8")) + + max_forms = 0 + for msd in self.forms.keys(): + max_forms = max(max_forms, len(self.forms[msd])) + s = " " + ident + " = " + for i in range(max_forms): + if i > 0: + s = s + "\n" + " " * (len(ident) + 2) + "| " + s = s + "mkPN " + if self.forms.has_key("Slmei"): + gender = "masculine" + tags = ["Slmei", "Slmer", "Slmed", "Slmetd", "Slmem", "Slmeo", "Slmdi", "Slmdr", "Slmdd", "Slmdt", "Slmdm", "Slmdo", "Slmmi", "Slmmr", "Slmmd", "Slmmt", "Slmmm", "Slmmo"] + if self.forms.has_key("Slmetn"): + tags[3] = "Slmetn" + elif self.forms.has_key("Slzei"): + gender = "feminine" + tags = ["Slzei", "Slzer", "Slzed", "Slzet", "Slzem", "Slzeo", "Slzdi", "Slzdr", "Slzdd", "Slzdt", "Slzdm", "Slzdo", "Slzmi", "Slzmr", "Slzmd", "Slzmt", "Slzmm", "Slzmo"] + else: + gender = "neuter" + tags = ["Slsei", "Slser", "Slsed", "Slset", "Slsem", "Slseo", "Slsdi", "Slsdr", "Slsdd", "Slsdt", "Slsdm", "Slsdo", "Slsmi", "Slsmr", "Slsmd", "Slsmt", "Slsmm", "Slsmo"] + for msd in tags: + if self.forms.has_key(msd): + s = s + "\"" + self.forms[msd][min(i,len(self.forms[msd])-1)] + "\" " + else: + s = s + "nonExist" + " " + s = s + gender + " " + s = s + ";\n" + self.cncf.write(s.encode("utf-8")) + elif self.pos2 == u"občno_ime": + ident = self.gen_id(self.lemma, "N") + s = " " + ident + " : N ;\n" + self.absf.write(s.encode("utf-8")) + + max_forms = 0 + for msd in self.forms.keys(): + max_forms = max(max_forms, len(self.forms[msd])) + s = " " + ident + " = " + for i in range(max_forms): + if i > 0: + s = s + "\n" + " " * (len(ident) + 2) + "| " + s = s + "mkN " + if self.forms.has_key("Somei"): + gender = "masculine" + tags = ["Somei", "Somer", "Somed", "Sometd", "Somem", "Someo", "Somdi", "Somdr", "Somdd", "Somdt", "Somdm", "Somdo", "Sommi", "Sommr", "Sommd", "Sommt", "Sommm", "Sommo"] + if self.forms.has_key("Sometn"): + tags[3] = "Sometn" + elif self.forms.has_key("Sozei"): + gender = "feminine" + tags = ["Sozei", "Sozer", "Sozed", "Sozet", "Sozem", "Sozeo", "Sozdi", "Sozdr", "Sozdd", "Sozdt", "Sozdm", "Sozdo", "Sozmi", "Sozmr", "Sozmd", "Sozmt", "Sozmm", "Sozmo"] + else: + gender = "neuter" + tags = ["Sosei", "Soser", "Sosed", "Soset", "Sosem", "Soseo", "Sosdi", "Sosdr", "Sosdd", "Sosdt", "Sosdm", "Sosdo", "Sosmi", "Sosmr", "Sosmd", "Sosmt", "Sosmm", "Sosmo"] + for msd in tags: + if self.forms.has_key(msd): + s = s + "\"" + self.forms[msd][min(i,len(self.forms[msd])-1)] + "\" " + else: + s = s + "nonExist " + s = s + gender + " " + s = s + ";\n" + self.cncf.write(s.encode("utf-8")) + elif self.pos == "glagol" and self.pos2 == "glavni": + ident = self.gen_id(self.lemma, "V") + s = " " + ident + " : V ;\n" + self.absf.write(s.encode("utf-8")) + + max_forms = 0 + for msd in self.forms.keys(): + max_forms = max(max_forms, len(self.forms[msd])) + s = " " + ident + " = " + for i in range(max_forms): + if i > 0: + s = s + "\n" + " " * (len(ident) + 2) + "| " + s = s + "mkV " + if self.forms.has_key("Ggvn"): + tags = ["Ggvn", "Ggvm", "Ggvd-em", "Ggvd-dm", "Ggvd-mm", "Ggvd-ez", "Ggvd-dz", "Ggvd-mz", "Ggvd-es", "Ggvd-ds", "Ggvd-ms", "Ggvspe", "Ggvsde", "Ggvste", "Ggvspd", "Ggvsdd", "Ggvstd", "Ggvspm", "Ggvsdm", "Ggvstm", "Ggvvpd", "Ggvvpm", "Ggvvde", "Ggvvdd", "Ggvvdm"] + elif self.forms.has_key("Ggnn"): + tags = ["Ggnn", "Ggnm", "Ggnd-em", "Ggnd-dm", "Ggnd-mm", "Ggnd-ez", "Ggnd-dz", "Ggnd-mz", "Ggnd-es", "Ggnd-ds", "Ggnd-ms", "Ggnspe", "Ggnsde", "Ggnste", "Ggnspd", "Ggnsdd", "Ggnstd", "Ggnspm", "Ggnsdm", "Ggnstm", "Ggnvpd", "Ggnvpm", "Ggnvde", "Ggnvdd", "Ggnvdm"] + else: + tags = ["Ggdn", "Ggdm", "Ggdd-em", "Ggdd-dm", "Ggdd-mm", "Ggdd-ez", "Ggdd-dz", "Ggdd-mz", "Ggdd-es", "Ggdd-ds", "Ggdd-ms", "Ggdspe", "Ggdsde", "Ggdste", "Ggdspd", "Ggdsdd", "Ggdstd", "Ggdspm", "Ggdsdm", "Ggdstm", "Ggdvpd", "Ggdvpm", "Ggdvde", "Ggdvdd", "Ggdvdm"] + for msd in tags: + if self.forms.has_key(msd): + s = s + "\"" + self.forms[msd][min(i,len(self.forms[msd])-1)] + "\" " + else: + s = s + "nonExist " + s = s + ";\n" + self.cncf.write(s.encode("utf-8")) + elif self.pos == "pridevnik" and self.pos2 == u"splošni": + ident = self.gen_id(self.lemma, "A") + s = " " + ident + " : A ;\n" + self.absf.write(s.encode("utf-8")) + + max_forms = 0 + for msd in self.forms.keys(): + max_forms = max(max_forms, len(self.forms[msd])) + s = " " + ident + " = " + for i in range(max_forms): + if i > 0: + s = s + "\n" + " " * (len(ident) + 2) + "| " + s = s + "mkA " + tags = ["Ppnmein", "Ppnmeid", "Ppnmer", "Ppnmed", "Ppnmet", "Ppnmetn", "Ppnmetd", "Ppnmem", "Ppnmeo", "Ppnmdi", "Ppnmdr", "Ppnmdd", "Ppnmdt", "Ppnmdm", "Ppnmdo", "Ppnmmi", "Ppnmmr", "Ppnmmd", "Ppnmmt", "Ppnmmm", "Ppnmmo", "Ppnzei", "Ppnzer", "Ppnzed", "Ppnzet", "Ppnzem", "Ppnzeo", "Ppnzdi", "Ppnzdr", "Ppnzdd", "Ppnzdt", "Ppnzdm", "Ppnzdo", "Ppnzmi", "Ppnzmr", "Ppnzmd", "Ppnzmt", "Ppnzmm", "Ppnzmo", "Ppnsei", "Ppnser", "Ppnsed", "Ppnset", "Ppnsem", "Ppnseo", "Ppnsdi", "Ppnsdr", "Ppnsdd", "Ppnsdt", "Ppnsdm", "Ppnsdo", "Ppnsmi", "Ppnsmr", "Ppnsmd", "Ppnsmt", "Ppnsmm", "Ppnsmo", + "Pppmeid", "Pppmer", "Pppmed", "Pppmet", "Pppmetd", "Pppmem", "Pppmeo", "Pppmdi", "Pppmdr", "Pppmdd", "Pppmdt", "Pppmdm", "Pppmdo", "Pppmmi", "Pppmmr", "Pppmmd", "Pppmmt", "Pppmmm", "Pppmmo", "Pppzei", "Pppzer", "Pppzed", "Pppzet", "Pppzem", "Pppzeo", "Pppzdi", "Pppzdr", "Pppzdd", "Pppzdt", "Pppzdm", "Pppzdo", "Pppzmi", "Pppzmr", "Pppzmd", "Pppzmt", "Pppzmm", "Pppzmo", "Pppsei", "Pppser", "Pppsed", "Pppset", "Pppsem", "Pppseo", "Pppsdi", "Pppsdr", "Pppsdd", "Pppsdt", "Pppsdm", "Pppsdo", "Pppsmi", "Pppsmr", "Pppsmd", "Pppsmt", "Pppsmm", "Pppsmo", + "Ppsmeid", "Ppsmer", "Ppsmed", "Ppsmet", "Ppsmetd", "Ppsmem", "Ppsmeo", "Ppsmdi", "Ppsmdr", "Ppsmdd", "Ppsmdt", "Ppsmdm", "Ppsmdo", "Ppsmmi", "Ppsmmr", "Ppsmmd", "Ppsmmt", "Ppsmmm", "Ppsmmo", "Ppszei", "Ppszer", "Ppszed", "Ppszet", "Ppszem", "Ppszeo", "Ppszdi", "Ppszdr", "Ppszdd", "Ppszdt", "Ppszdm", "Ppszdo", "Ppszmi", "Ppszmr", "Ppszmd", "Ppszmt", "Ppszmm", "Ppszmo", "Ppssei", "Ppsser", "Ppssed", "Ppsset", "Ppssem", "Ppsseo", "Ppssdi", "Ppssdr", "Ppssdd", "Ppssdt", "Ppssdm", "Ppssdo", "Ppssmi", "Ppssmr", "Ppssmd", "Ppssmt", "Ppssmm", "Ppssmo"] + for msd in tags: + if self.forms.has_key(msd): + s = s + "\"" + self.forms[msd][min(i,len(self.forms[msd])-1)] + "\" " + else: + s = s + "nonExist " + s = s + ";\n" + self.cncf.write(s.encode("utf-8")) + +parser = xml.sax.make_parser() +handler = InkscapeSvgHandler() +parser.setContentHandler(handler) +parser.parse(open("Sloleks_v1.2.xml","r")) +handler.close() +